TABLE OF CONTENTS
- ABINIT/m_cgtools
- ABINIT/subdiago
- ABINIT/subdiago_low_memory
- m_cgtools/cg_addtorho
- m_cgtools/cg_box2gsph
- m_cgtools/cg_dznrm2
- m_cgtools/cg_envlop
- m_cgtools/cg_from_reim
- m_cgtools/cg_fromcplx
- m_cgtools/cg_get_eigens
- m_cgtools/cg_get_residvecs
- m_cgtools/cg_getspin
- m_cgtools/cg_gsph2box
- m_cgtools/cg_hprotate_and_get_diag
- m_cgtools/cg_hrotate_and_get_diag
- m_cgtools/cg_kfilter
- m_cgtools/cg_norm2g
- m_cgtools/cg_normev
- m_cgtools/cg_precon
- m_cgtools/cg_precon_block
- m_cgtools/cg_precon_many
- m_cgtools/cg_real_zdotc
- m_cgtools/cg_set_imag0_to_zero
- m_cgtools/cg_setaug_zero
- m_cgtools/cg_to_reim
- m_cgtools/cg_tocplx
- m_cgtools/cg_vlocpsi
- m_cgtools/cg_zaxpby
- m_cgtools/cg_zaxpy
- m_cgtools/cg_zaxpy_many_areal
- m_cgtools/cg_zcopy
- m_cgtools/cg_zdotc
- m_cgtools/cg_zdotg_zip
- m_cgtools/cg_zdotu
- m_cgtools/cg_zgemm
- m_cgtools/cg_zgemv
- m_cgtools/cg_zprecon_block
- m_cgtools/cg_zscal
- m_cgtools/cgnc_cholesky
- m_cgtools/cgnc_gramschmidt
- m_cgtools/cgnc_gsortho
- m_cgtools/cgnc_normalize
- m_cgtools/cgpaw_cholesky
- m_cgtools/cgpaw_gramschmidt
- m_cgtools/cgpaw_gsortho
- m_cgtools/cgpaw_normalize
- m_cgtools/dotprod_g
- m_cgtools/dotprod_v
- m_cgtools/dotprod_vn
- m_cgtools/fxphas_and_cmp
- m_cgtools/fxphas_seq
- m_cgtools/matrixelmt_g
- m_cgtools/mean_fftr
- m_cgtools/overlap_g
- m_cgtools/projbd
- m_cgtools/pw_orthon
- m_cgtools/pw_orthon_paw
- m_cgtools/set_istwfk
- m_cgtools/sqnorm_g
- m_cgtools/sqnorm_v
ABINIT/m_cgtools [ Modules ]
NAME
m_cgtools
FUNCTION
This module defines wrappers for BLAS routines. The arguments are stored using the "cg" convention, namely real array of shape cg(2,...)
COPYRIGHT
Copyright (C) 1992-2024 ABINIT group (MG, MT, XG, DCA, GZ, FB, MVer, DCA, GMR, FF) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt . For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
NOTES
1) The convention about names of interfaced routine is: cg_<name>, where <name> is equal to the name of the standard BLAS routine 2) Blas routines are called without an explicit interface on purpose since a) The compiler should pass the base address of the array to the F77 BLAS b) Any compiler would complain about type mismatch (REAL,COMPLEX) if an explicit interface is given. 3) The use of mpi_type is not allowed here. MPI parallelism should be handled in a generic way by passing the MPI communicator so that the caller can decide how to handle MPI.
ABINIT/subdiago [ Functions ]
NAME
subdiago
FUNCTION
This routine diagonalizes the Hamiltonian in the trial subspace.
INPUTS
icg=shift to be applied on the location of data in the array cg igsc=shift to be applied on the location of data in the array gsc istwf_k=input parameter that describes the storage of wfs mcg=second dimension of the cg array mgsc=second dimension of the gsc array nband_k=number of bands at this k point for that spin polarization npw_k=number of plane waves at this k point my_nspinor=number of spinorial components of the wavefunctions (on current proc) use_subovl=1 if the overlap matrix is not identity in WFs subspace usepaw= 0 for non paw calculation; =1 for paw calculation me_g0=1 if this processor has G=0, 0 otherwise.
OUTPUT
eig_k(nband_k)=array for holding eigenvalues (hartree) evec(2*nband_k,nband_k)=array for holding eigenvectors
SIDE EFFECTS
subham(nband_k*(nband_k+1))=Hamiltonian expressed in the WFs subspace. Hermitianized in output. subovl(nband_k*(nband_k+1)*use_subovl)=overlap matrix expressed in the WFs subspace. Hermitianized in output. cg(2,mcg)=wavefunctions gsc(2,mgsc)=<g|S|c> matrix elements (S=overlap)
SOURCE
4304 subroutine subdiago(cg, eig_k, evec, gsc, icg, igsc, istwf_k, mcg, mgsc, nband_k, npw_k, my_nspinor, paral_kgb, & 4305 subham, subovl, use_subovl, usepaw, me_g0) 4306 4307 use m_linalg_interfaces 4308 use m_abi_linalg 4309 4310 !Arguments ------------------------------------ 4311 integer,intent(in) :: icg,igsc,istwf_k,mcg,mgsc,nband_k,npw_k,me_g0 4312 integer,intent(in) :: my_nspinor,paral_kgb,use_subovl,usepaw 4313 real(dp),intent(inout) :: subham(nband_k*(nband_k+1)),subovl(nband_k*(nband_k+1)*use_subovl) 4314 real(dp),intent(out) :: eig_k(nband_k),evec(2*nband_k,nband_k) 4315 real(dp),intent(inout) :: cg(2,mcg),gsc(2,mgsc) 4316 4317 !Local variables------------------------------- 4318 integer :: iband,ii,ierr,rvectsize,vectsize,use_slk 4319 !real(dp) :: cpu, wall, gflops 4320 character(len=500) :: msg 4321 ! real(dp) :: tsec(2) 4322 real(dp),allocatable :: evec_re(:,:),subovl_re(:),subham_tmp(:), work(:,:) 4323 real(dp),allocatable :: blockvectora(:,:),blockvectorb(:,:),blockvectorc(:,:) 4324 4325 ! ********************************************************************* 4326 4327 if (paral_kgb<0) then 4328 ABI_BUG('paral_kgb should be positive ') 4329 end if 4330 4331 ! 1 if Scalapack version is used. 4332 ! MG TODO: This should not be bound to paral_kgb 4333 use_slk = paral_kgb 4334 4335 rvectsize=npw_k*my_nspinor 4336 vectsize=2*rvectsize;if (me_g0==1) vectsize=vectsize-1 4337 !call cwtime(cpu, wall, gflops, "start") 4338 4339 !Impose Hermiticity on diagonal elements of subham (and subovl, if needed) 4340 ! MG FIXME: In these two calls we are aliasing the args 4341 call hermit(subham, subham, ierr, nband_k) 4342 if (use_subovl==1) call hermit(subovl, subovl, ierr, nband_k) 4343 !call cwtime_report(" hermit", cpu, wall, gflops) 4344 4345 ! Diagonalize the Hamitonian matrix 4346 if (istwf_k==2) then 4347 ABI_CALLOC(evec_re, (nband_k,nband_k)) 4348 ABI_MALLOC(subham_tmp, (nband_k*(nband_k+1)/2)) 4349 subham_tmp=subham(1:nband_k*(nband_k+1):2) 4350 if (use_subovl==1) then 4351 ABI_MALLOC(subovl_re, (nband_k*(nband_k+1)/2)) 4352 subovl_re=subovl(1:nband_k*(nband_k+1):2) 4353 ! TODO: Not sure this one has been fully tested 4354 call abi_xhpgv(1,'V','U',nband_k,subham_tmp,subovl_re,eig_k,evec_re,nband_k,istwf_k=istwf_k,use_slk=use_slk) 4355 ABI_FREE(subovl_re) 4356 else 4357 call abi_xhpev('V','U',nband_k,subham_tmp,eig_k,evec_re,nband_k,istwf_k=istwf_k,use_slk=use_slk) 4358 end if 4359 evec(:,:)=zero; evec(1:2*nband_k:2,:) = evec_re 4360 ABI_FREE(evec_re) 4361 ABI_FREE(subham_tmp) 4362 else 4363 if (use_subovl==1) then 4364 call abi_xhpgv(1,'V','U',nband_k,subham,subovl,eig_k,evec,nband_k,istwf_k=istwf_k,use_slk=use_slk) 4365 else 4366 call abi_xhpev('V','U',nband_k,subham,eig_k,evec,nband_k,istwf_k=istwf_k,use_slk=use_slk) 4367 end if 4368 end if 4369 !call cwtime_report(" hdiago", cpu, wall, gflops) 4370 4371 ! Normalize each eigenvector and set phase: 4372 ! this is because of the simultaneous diagonalisation of this 4373 ! matrix by different processors, allowing to get different unitary transforms, thus breaking the 4374 ! coherency of parts of cg stored on different processors). 4375 ! 4376 ! The problem with minus/plus signs might be present also if .not. use_subovl 4377 ! 4378 !if(use_subovl == 0) then 4379 call cg_normev(evec, nband_k, nband_k) 4380 !end if 4381 4382 if(istwf_k==2)then 4383 do iband=1,nband_k 4384 do ii=1,nband_k 4385 if(abs(evec(2*ii,iband))>1.0d-10)then 4386 write(msg,'(3a,2i0,2es16.6,a,a)')ch10,& 4387 ' For istwf_k=2, observed the following element of evec:',ch10,& 4388 iband,ii,evec(2*ii-1,iband),evec(2*ii,iband),ch10,' with a non-negligible imaginary part.' 4389 ABI_BUG(msg) 4390 end if 4391 end do 4392 end do 4393 end if 4394 !call cwtime_report(" normev", cpu, wall, gflops) 4395 4396 !===================================================== 4397 ! Carry out rotation of bands C(G,n) according to evecs 4398 ! ZGEMM if istwfk==1, DGEMM if istwfk==2 4399 !===================================================== 4400 if (istwf_k==2) then 4401 4402 ABI_MALLOC_OR_DIE(blockvectora, (vectsize, nband_k), ierr) 4403 ABI_MALLOC_OR_DIE(blockvectorb, (nband_k, nband_k), ierr) 4404 ABI_MALLOC_OR_DIE(blockvectorc, (vectsize, nband_k), ierr) 4405 4406 do iband=1,nband_k 4407 if (me_g0 == 1) then 4408 call abi_xcopy(1,cg(1,cgindex_subd(iband)),1,blockvectora(1,iband),1) 4409 call abi_xcopy(rvectsize-1,cg(1,cgindex_subd(iband)+1),2,blockvectora(2,iband),1) 4410 call abi_xcopy(rvectsize-1,cg(2,cgindex_subd(iband)+1),2,blockvectora(rvectsize+1,iband),1) 4411 else 4412 call abi_xcopy(rvectsize,cg(1,cgindex_subd(iband)),2,blockvectora(1,iband),1) 4413 call abi_xcopy(rvectsize,cg(2,cgindex_subd(iband)),2,blockvectora(rvectsize+1,iband),1) 4414 end if 4415 call abi_xcopy(nband_k,evec(2*iband-1,1),2*nband_k,blockvectorb(iband,1),nband_k) 4416 end do 4417 4418 !MG TODO: This one is a DGEMM. 4419 call abi_xgemm('N','N',vectsize,nband_k,nband_k,& 4420 cone,blockvectora,vectsize,blockvectorb,nband_k,czero,blockvectorc,vectsize) 4421 4422 do iband=1,nband_k 4423 if (me_g0 == 1) then 4424 call abi_xcopy(1,blockvectorc(1,iband),1,cg(1,cgindex_subd(iband)),1) 4425 call abi_xcopy(rvectsize-1,blockvectorc(2,iband),1,cg(1,cgindex_subd(iband)+1),2) 4426 call abi_xcopy(rvectsize-1,blockvectorc(rvectsize+1,iband),1,cg(2,cgindex_subd(iband)+1),2) 4427 else 4428 call abi_xcopy(rvectsize,blockvectorc(1,iband),1,cg(1,cgindex_subd(iband)),2) 4429 call abi_xcopy(rvectsize,blockvectorc(rvectsize+1,iband),1,cg(2,cgindex_subd(iband)),2) 4430 end if 4431 end do 4432 4433 if (usepaw==1) then 4434 ! If paw, must also rotate S.C(G,n): 4435 4436 do iband=1,nband_k 4437 if (me_g0 == 1) then 4438 call abi_xcopy(1,gsc(1,gscindex_subd(iband)),1,blockvectora(1,iband),1) 4439 call abi_xcopy(rvectsize-1,gsc(1,gscindex_subd(iband)+1),2,blockvectora(2,iband),1) 4440 call abi_xcopy(rvectsize-1,gsc(2,gscindex_subd(iband)+1),2,blockvectora(rvectsize+1,iband),1) 4441 else 4442 call abi_xcopy(rvectsize ,gsc(1,gscindex_subd(iband)),2,blockvectora(1,iband),1) 4443 call abi_xcopy(rvectsize ,gsc(2,gscindex_subd(iband)),2,blockvectora(rvectsize+1,iband),1) 4444 end if 4445 call abi_xcopy(nband_k,evec(2*iband-1,1),2*nband_k,blockvectorb(iband,1),nband_k) 4446 end do 4447 4448 call abi_xgemm('N','N',vectsize,nband_k,nband_k,& 4449 cone,blockvectora,vectsize,blockvectorb,nband_k,czero,blockvectorc,vectsize) 4450 4451 do iband=1,nband_k 4452 if (me_g0 == 1) then 4453 call abi_xcopy(1,blockvectorc(1,iband),1,gsc(1,gscindex_subd(iband)),1) 4454 call abi_xcopy(rvectsize-1,blockvectorc(2,iband),1,gsc(1,gscindex_subd(iband)+1),2) 4455 call abi_xcopy(rvectsize-1,blockvectorc(rvectsize+1,iband),1,gsc(2,gscindex_subd(iband)+1),2) 4456 else 4457 call abi_xcopy(rvectsize,blockvectorc(1,iband),1,gsc(1,gscindex_subd(iband)),2) 4458 call abi_xcopy(rvectsize,blockvectorc(rvectsize+1,iband),1,gsc(2,gscindex_subd(iband)),2) 4459 end if 4460 end do 4461 4462 end if 4463 4464 ABI_FREE(blockvectora) 4465 ABI_FREE(blockvectorb) 4466 ABI_FREE(blockvectorc) 4467 4468 else 4469 ! istwf_k /= 2 4470 ABI_MALLOC_OR_DIE(work, (2,npw_k*my_nspinor*nband_k), ierr) 4471 4472 ! MG: Do not remove this initialization. 4473 ! telast_06 stops in fxphase on inca_debug and little_buda (very very strange, due to atlas?) 4474 !work=zero 4475 4476 call abi_xgemm('N','N',npw_k*my_nspinor,nband_k,nband_k,cone, & 4477 cg(:,icg+1:npw_k*my_nspinor*nband_k+icg),npw_k*my_nspinor, & 4478 evec,nband_k,czero,work,npw_k*my_nspinor,x_cplx=2) 4479 4480 call abi_xcopy(npw_k*my_nspinor*nband_k,work(1,1),1,cg(1,1+icg),1,x_cplx=2) 4481 4482 if (usepaw==1) then 4483 ! If paw, must also rotate S.C(G,n): 4484 call abi_xgemm('N','N',npw_k*my_nspinor,nband_k,nband_k,cone, & 4485 gsc(:,1+igsc:npw_k*my_nspinor*nband_k+igsc),npw_k*my_nspinor, & 4486 evec,nband_k,czero,work,npw_k*my_nspinor,x_cplx=2) 4487 call abi_xcopy(npw_k*my_nspinor*nband_k, work(1,1),1,gsc(1,1+igsc),1,x_cplx=2) 4488 end if 4489 4490 ABI_FREE(work) 4491 end if 4492 !call cwtime_report(" rotation", cpu, wall, gflops) 4493 4494 contains 4495 4496 function cgindex_subd(iband) 4497 integer :: iband,cgindex_subd 4498 cgindex_subd=npw_k*my_nspinor*(iband-1)+icg+1 4499 end function cgindex_subd 4500 4501 function gscindex_subd(iband) 4502 integer :: iband,gscindex_subd 4503 gscindex_subd=npw_k*my_nspinor*(iband-1)+igsc+1 4504 end function gscindex_subd 4505 4506 end subroutine subdiago
ABINIT/subdiago_low_memory [ Functions ]
NAME
subdiago_low_memory
FUNCTION
This routine diagonalizes the Hamiltonian in the eigenfunction subspace Separate the computation in blocks of plane waves to save memory
INPUTS
icg=shift to be applied on the location of data in the array cg istwf_k=input parameter that describes the storage of wfs mcg=second dimension of the cg array nband_k=number of bands at this k point for that spin polarization npw_k=number of plane waves at this k point nspinor=number of spinorial components of the wavefunctions (on current proc) subham(nband_k*(nband_k+1))=Hamiltonian expressed in the WFs subspace
OUTPUT
eig_k(nband_k)=array for holding eigenvalues (hartree) evec(2*nband_k,nband_k)=array for holding eigenvectors
SIDE EFFECTS
cg(2,mcg)=wavefunctions
SOURCE
4535 subroutine subdiago_low_memory(cg,eig_k,evec,icg,istwf_k,& 4536 & mcg,nband_k,npw_k,nspinor,paral_kgb,& 4537 & subham) 4538 4539 use m_linalg_interfaces 4540 use m_abi_linalg 4541 4542 !Arguments ------------------------------------ 4543 integer,intent(in) :: icg,istwf_k,mcg,nband_k,npw_k 4544 integer,intent(in) :: nspinor,paral_kgb 4545 real(dp),intent(inout) :: subham(nband_k*(nband_k+1)) 4546 real(dp),intent(out) :: eig_k(nband_k),evec(2*nband_k,nband_k) 4547 real(dp),intent(inout),target :: cg(2,mcg) 4548 4549 !Local variables------------------------------- 4550 integer :: ig,igfirst,block_size,iblock,nblock,block_size_tmp,wfsize 4551 integer :: iband,ii,ierr,vectsize,use_slk 4552 character(len=500) :: message 4553 ! real(dp) :: tsec(2) 4554 real(dp),allocatable :: evec_tmp(:,:),subham_tmp(:) 4555 real(dp),allocatable :: work(:,:) 4556 real(dp),allocatable :: blockvectora(:,:),blockvectorb(:,:),blockvectorc(:,:) 4557 real(dp),pointer :: cg_block(:,:) 4558 4559 ! ********************************************************************* 4560 4561 if (paral_kgb<0) then 4562 ABI_BUG('paral_kgb should be positive ') 4563 end if 4564 4565 ! 1 if Scalapack version is used. 4566 use_slk = paral_kgb 4567 4568 !Impose Hermiticity on diagonal elements of subham (and subovl, if needed) 4569 ! MG FIXME: In these two calls we are aliasing the args 4570 call hermit(subham,subham,ierr,nband_k) 4571 4572 !Diagonalize the Hamitonian matrix 4573 if(istwf_k==2) then 4574 ABI_MALLOC(evec_tmp,(nband_k,nband_k)) 4575 ABI_MALLOC(subham_tmp,(nband_k*(nband_k+1)/2)) 4576 subham_tmp=subham(1:nband_k*(nband_k+1):2) 4577 evec_tmp=zero 4578 call abi_xhpev('V','U',nband_k,subham_tmp,eig_k,evec_tmp,nband_k,istwf_k=istwf_k,use_slk=use_slk) 4579 evec(:,:)=zero;evec(1:2*nband_k:2,:) =evec_tmp 4580 ABI_FREE(evec_tmp) 4581 ABI_FREE(subham_tmp) 4582 else 4583 call abi_xhpev('V','U',nband_k,subham,eig_k,evec,nband_k,istwf_k=istwf_k,use_slk=use_slk) 4584 end if 4585 4586 !Normalize each eigenvector and set phase: 4587 !The problem with minus/plus signs might be present also if .not. use_subovl 4588 !if(use_subovl == 0) then 4589 call cg_normev(evec,nband_k,nband_k) 4590 !end if 4591 4592 if(istwf_k==2)then 4593 do iband=1,nband_k 4594 do ii=1,nband_k 4595 if(abs(evec(2*ii,iband))>1.0d-10)then 4596 write(message,'(3a,2i0,2es16.6,a,a)')ch10,& 4597 & ' subdiago: For istwf_k=2, observed the following element of evec :',ch10,& 4598 & iband,ii,evec(2*ii-1,iband),evec(2*ii,iband),ch10,' with a non-negligible imaginary part.' 4599 ABI_BUG(message) 4600 end if 4601 end do 4602 end do 4603 end if 4604 4605 !===================================================== 4606 !Carry out rotation of bands C(G,n) according to evecs 4607 ! ZGEMM if istwfk==1, DGEMM if istwfk==2 4608 !===================================================== 4609 wfsize=npw_k*nspinor 4610 4611 block_size=100 4612 4613 if (wfsize<block_size) block_size=wfsize 4614 4615 nblock=wfsize/block_size 4616 if (mod(wfsize,block_size)/=0) nblock=nblock+1 4617 4618 if (istwf_k>1) then ! evec is real 4619 4620 vectsize=2*block_size 4621 4622 ABI_MALLOC_OR_DIE(blockvectora,(vectsize,nband_k), ierr) 4623 ABI_MALLOC_OR_DIE(blockvectorb,(nband_k,nband_k), ierr) 4624 ABI_MALLOC_OR_DIE(blockvectorc,(vectsize,nband_k), ierr) 4625 4626 do iband=1,nband_k 4627 call abi_xcopy(nband_k,evec(2*iband-1,1),2*nband_k,blockvectorb(iband,1),nband_k) 4628 end do 4629 4630 do iblock=1,nblock 4631 4632 igfirst=(iblock-1)*block_size 4633 block_size_tmp=block_size 4634 if (igfirst+block_size>wfsize) then 4635 block_size_tmp=wfsize-igfirst 4636 end if 4637 4638 do iband=1,nband_k 4639 call abi_xcopy(block_size_tmp,cg(1,1+cgindex_subd(iblock,iband)),2,blockvectora(1,iband),1) 4640 call abi_xcopy(block_size_tmp,cg(2,1+cgindex_subd(iblock,iband)),2,blockvectora(block_size+1,iband),1) 4641 if (block_size_tmp<block_size) then 4642 blockvectora(block_size_tmp+1:block_size,iband) = zero 4643 blockvectora(block_size+block_size_tmp+1:2*block_size,iband) = zero 4644 end if 4645 end do 4646 4647 call abi_xgemm('N','N',vectsize,nband_k,nband_k,& 4648 & cone,blockvectora,vectsize,blockvectorb,nband_k,czero,blockvectorc,vectsize) 4649 4650 do iband=1,nband_k 4651 call abi_xcopy(block_size_tmp,blockvectorc(1,iband),1,cg(1,1+cgindex_subd(iblock,iband)),2) 4652 call abi_xcopy(block_size_tmp,blockvectorc(block_size+1,iband),1,cg(2,1+cgindex_subd(iblock,iband)),2) 4653 end do 4654 4655 end do 4656 4657 ABI_FREE(blockvectora) 4658 ABI_FREE(blockvectorb) 4659 ABI_FREE(blockvectorc) 4660 4661 else ! evec is complex 4662 4663 ABI_MALLOC_OR_DIE(work,(2,block_size*nband_k), ierr) 4664 if (nblock==1) then 4665 cg_block => cg(:,icg+1:icg+nband_k*wfsize) 4666 else 4667 ABI_MALLOC_OR_DIE(cg_block,(2,block_size*nband_k), ierr) 4668 end if 4669 4670 do iblock=1,nblock 4671 igfirst=(iblock-1)*block_size 4672 block_size_tmp=block_size 4673 if (igfirst+block_size>wfsize) then 4674 block_size_tmp=wfsize-igfirst 4675 end if 4676 if (nblock/=1) then 4677 do iband=1,nband_k 4678 do ig=1,block_size_tmp 4679 cg_block(:,ig+(iband-1)*block_size) = cg(:,ig+cgindex_subd(iblock,iband)) 4680 end do 4681 if (block_size_tmp<block_size) then 4682 do ig=block_size_tmp+1,block_size 4683 cg_block(:,ig+(iband-1)*block_size) = zero 4684 end do 4685 end if 4686 end do 4687 end if 4688 call abi_xgemm('N','N',block_size,nband_k,nband_k,cone,cg_block,block_size,evec,nband_k,czero,work,& 4689 & block_size,x_cplx=2) 4690 do iband=1,nband_k 4691 do ig=1,block_size_tmp 4692 cg(:,ig+cgindex_subd(iblock,iband)) = work(:,ig+(iband-1)*block_size) 4693 end do 4694 end do 4695 end do 4696 4697 ABI_FREE(work) 4698 if (nblock/=1) then 4699 ABI_FREE(cg_block) 4700 end if 4701 4702 end if 4703 4704 contains 4705 4706 function cgindex_subd(iblock,iband) 4707 4708 integer :: iband,iblock,cgindex_subd 4709 cgindex_subd=(iblock-1)*block_size+(iband-1)*wfsize+icg 4710 end function cgindex_subd 4711 4712 end subroutine subdiago_low_memory
m_cgtools/cg_addtorho [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_addtorho
FUNCTION
Add |ur|**2 to the ground-states density rho. rho = rho + weight_r * Re[ur]**2 + weight_i * Im[ur]**2
INPUTS
nx,ny,nz=physical dimension of the FFT box. ldx,ldy,ldz=leading dimensions of the arrays. ndat=number of contributions to accumulate. weight_r=weight used for the accumulation of the density in real space weight_i=weight used for the accumulation of the density in real space ur(2,ldx,ldy,ldz*ndat)=wavefunctions in real space
SIDE EFFECTS
rho(ldx,ldy,ldz) = contains the input density at input, modified in input with the contribution gived by ur.
SOURCE
2072 subroutine cg_addtorho(nx,ny,nz,ldx,ldy,ldz,ndat,weight_r,weight_i,ur,rho) 2073 2074 !Arguments ------------------------------------ 2075 !scalars 2076 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat 2077 real(dp),intent(in) :: weight_i,weight_r 2078 !arrays 2079 real(dp),intent(in) :: ur(2,ldx,ldy,ldz*ndat) 2080 real(dp),intent(inout) :: rho(ldx,ldy,ldz) 2081 2082 !Local variables------------------------------- 2083 !scalars 2084 integer :: ix,iy,iz,idat,izdat 2085 2086 ! ************************************************************************* 2087 2088 if (ndat==1) then 2089 !$OMP PARALLEL DO 2090 do iz=1,nz 2091 do iy=1,ny 2092 do ix=1,nx 2093 rho(ix,iy,iz) = rho(ix,iy,iz) + weight_r * ur(1,ix,iy,iz)**2 & 2094 & + weight_i * ur(2,ix,iy,iz)**2 2095 end do 2096 end do 2097 end do 2098 2099 else 2100 ! It would be nice to use $OMP PARALLEL DO PRIVATE(izdat) REDUCTION(+:rho) 2101 ! but it's risky as the private rho is allocated on the stack of the thread. 2102 !$OMP PARALLEL PRIVATE(izdat) 2103 do idat=1,ndat 2104 !$OMP DO 2105 do iz=1,nz 2106 izdat = iz + (idat-1)*ldz 2107 do iy=1,ny 2108 do ix=1,nx 2109 rho(ix,iy,iz) = rho(ix,iy,iz) + weight_r * ur(1,ix,iy,izdat)**2 & 2110 & + weight_i * ur(2,ix,iy,izdat)**2 2111 end do 2112 end do 2113 end do 2114 !$OMP END DO NOWAIT 2115 end do 2116 !$OMP END PARALLEL 2117 end if 2118 2119 end subroutine cg_addtorho
m_cgtools/cg_box2gsph [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_box2gsph
FUNCTION
INPUTS
nx,ny,nz=physical dimension of the FFT box. ldx,ldy,ldz=Logical dimensions of the arrays. ndat=number of data in iarrbox npw_k=Number of planewaves in the G-sphere. kg_k(3,npw_k)=Reduced coordinates of the G-vectoes. iarrbox(2,ldx,ldy,ldz*ndat)=Input arrays on the FFT box. [rscal] = Scaling factor
OUTPUT
oarrsph(2,npw_k*ndat)=Data defined on the G-sphere.
SOURCE
1960 subroutine cg_box2gsph(nx,ny,nz,ldx,ldy,ldz,ndat,npw_k,kg_k,iarrbox,oarrsph,rscal) 1961 1962 !Arguments ------------------------------------ 1963 !scalars 1964 integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat 1965 real(dp),optional,intent(in) :: rscal 1966 !arrays 1967 integer,intent(in) :: kg_k(3,npw_k) 1968 real(dp),intent(in) :: iarrbox(2,ldx*ldy*ldz*ndat) 1969 real(dp),intent(out) :: oarrsph(2,npw_k*ndat) 1970 1971 !Local variables------------------------------- 1972 !scalars 1973 integer :: ig,ix,iy,iz,idat,sph_pad,box_pad,ifft 1974 1975 ! ************************************************************************* 1976 1977 if (.not. PRESENT(rscal)) then 1978 ! 1979 if (ndat==1) then 1980 !$OMP PARALLEL DO PRIVATE(ix,iy,iz,ifft) 1981 do ig=1,npw_k 1982 ix=kg_k(1,ig); if (ix<0) ix=ix+nx; ix=ix+1 1983 iy=kg_k(2,ig); if (iy<0) iy=iy+ny; iy=iy+1 1984 iz=kg_k(3,ig); if (iz<0) iz=iz+nz; iz=iz+1 1985 ifft = ix + (iy-1)*ldx + (iz-1)*ldx*ldy 1986 #if defined FC_NVHPC 1987 if (ifft<0) stop "prevent from miscompiling this section" 1988 #endif 1989 oarrsph(1,ig) = iarrbox(1,ifft) 1990 oarrsph(2,ig) = iarrbox(2,ifft) 1991 end do 1992 else 1993 !$OMP PARALLEL DO PRIVATE(sph_pad,box_pad,ix,iy,iz,ifft) 1994 do idat=1,ndat 1995 sph_pad = (idat-1)*npw_k 1996 box_pad = (idat-1)*ldx*ldy*ldz 1997 do ig=1,npw_k 1998 ix=kg_k(1,ig); if (ix<0) ix=ix+nx; ix=ix+1 1999 iy=kg_k(2,ig); if (iy<0) iy=iy+ny; iy=iy+1 2000 iz=kg_k(3,ig); if (iz<0) iz=iz+nz; iz=iz+1 2001 ifft = ix + (iy-1)*ldx + (iz-1)*ldx*ldy 2002 #if defined FC_NVHPC 2003 if (ifft<0) stop "prevent from miscompiling this section" 2004 #endif 2005 oarrsph(1,ig+sph_pad) = iarrbox(1,ifft+box_pad) 2006 oarrsph(2,ig+sph_pad) = iarrbox(2,ifft+box_pad) 2007 end do 2008 end do 2009 end if 2010 ! 2011 else 2012 if (ndat==1) then 2013 !$OMP PARALLEL DO PRIVATE(ix,iy,iz,ifft) 2014 do ig=1,npw_k 2015 ix=kg_k(1,ig); if (ix<0) ix=ix+nx; ix=ix+1 2016 iy=kg_k(2,ig); if (iy<0) iy=iy+ny; iy=iy+1 2017 iz=kg_k(3,ig); if (iz<0) iz=iz+nz; iz=iz+1 2018 ifft = ix + (iy-1)*ldx + (iz-1)*ldx*ldy 2019 #if defined FC_NVHPC 2020 if (ifft<0) stop "prevent from miscompiling this section" 2021 #endif 2022 oarrsph(1,ig) = iarrbox(1,ifft) * rscal 2023 oarrsph(2,ig) = iarrbox(2,ifft) * rscal 2024 end do 2025 else 2026 !$OMP PARALLEL DO PRIVATE(sph_pad,box_pad,ix,iy,iz,ifft) 2027 do idat=1,ndat 2028 sph_pad = (idat-1)*npw_k 2029 box_pad = (idat-1)*ldx*ldy*ldz 2030 do ig=1,npw_k 2031 ix=kg_k(1,ig); if (ix<0) ix=ix+nx; ix=ix+1 2032 iy=kg_k(2,ig); if (iy<0) iy=iy+ny; iy=iy+1 2033 iz=kg_k(3,ig); if (iz<0) iz=iz+nz; iz=iz+1 2034 ifft = ix + (iy-1)*ldx + (iz-1)*ldx*ldy 2035 #if defined FC_NVHPC 2036 if (ifft<0) stop "prevent from miscompiling this section" 2037 #endif 2038 oarrsph(1,ig+sph_pad) = iarrbox(1,ifft+box_pad) * rscal 2039 oarrsph(2,ig+sph_pad) = iarrbox(2,ifft+box_pad) * rscal 2040 end do 2041 end do 2042 end if 2043 end if 2044 2045 end subroutine cg_box2gsph
m_cgtools/cg_dznrm2 [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_dznrm2
FUNCTION
returns the euclidean norm of a vector via the function name, so that DZNRM2 := sqrt( x**H*x )
INPUTS
n = Specifies the number of elements in vector x. x(2*x) = Input array.
OUTPUT
SOURCE
490 function cg_dznrm2(n, x) result(res) 491 492 !Arguments ------------------------------------ 493 !scalars 494 integer,intent(in) :: n 495 real(dp) :: res 496 !arrays 497 real(dp),intent(in) :: x(2*n) 498 real(dp),external :: dznrm2 499 500 ! ************************************************************************* 501 502 res = dznrm2(n, x, 1) 503 504 end function cg_dznrm2
m_cgtools/cg_envlop [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_envlop
FUNCTION
Multiply random number values in cg by envelope function to lower initial kinetic energy. Envelope $\left( 1-\left( G/G_{\max }\right) ^2\right) ^{power}$ for |G|<= Gmax. Near G=0, little scaling, and goes to zero flatly near Gmax.
INPUTS
cg(2,mcg)=initial random number wavefunctions ecut=kinetic energy cutoff in Ha gmet(3,3)=reciprocal space metric (bohr^-2) icgmod=shift to be given to the location of data in cg kg(3,npw)=reduced coordinates of G vectors in basis sphere kpoint(3)=reduced coordinates of k point mcg=maximum second dimension of cg (at least npw*nband*nspinor) nband=number of bands being considered npw=number of planewaves in basis sphere nspinor=number of spinorial components of the wavefunctions
OUTPUT
cg(2,mcg)=revised values (not orthonormalized)
SOURCE
3159 subroutine cg_envlop(cg, ecut, gmet, icgmod, kg, kpoint, mcg, nband, npw, nspinor) 3160 3161 !Arguments ------------------------------------ 3162 !scalars 3163 integer,intent(in) :: icgmod,mcg,nband,npw,nspinor 3164 real(dp),intent(in) :: ecut 3165 !arrays 3166 integer,intent(in) :: kg(3,npw) 3167 real(dp),intent(in) :: gmet(3,3),kpoint(3) 3168 real(dp),intent(inout) :: cg(2,mcg) 3169 3170 !Local variables------------------------------- 3171 !scalars 3172 integer,parameter :: re=1,im=2,power=12 3173 integer :: i1,i2,i3,ig,igs,ispinor,nn,spad 3174 real(dp) :: cutoff,gs,kpgsqc 3175 !character(len=500) :: msg 3176 !arrays 3177 real(dp),allocatable :: cut_pws(:) 3178 3179 ! ************************************************************************* 3180 3181 !$(k+G)^2$ cutoff from $(1/2)(2 Pi (k+G))^2 = ecut$ 3182 kpgsqc=ecut/(2.0_dp*pi**2) 3183 cutoff = kpgsqc 3184 3185 ABI_MALLOC(cut_pws,(npw)) 3186 3187 !Run through G vectors in basis 3188 !$OMP PARALLEL DO PRIVATE(gs) 3189 do ig=1,npw 3190 i1=kg(1,ig) ; i2=kg(2,ig) ; i3=kg(3,ig) 3191 !(k+G)^2 evaluated using metric and kpoint 3192 gs = gmet(1,1)*(kpoint(1)+dble(i1))**2+& 3193 & gmet(2,2)*(kpoint(2)+dble(i2))**2+& 3194 & gmet(3,3)*(kpoint(3)+dble(i3))**2+& 3195 & 2.0_dp*(gmet(2,1)*(kpoint(2)+dble(i2))*(kpoint(1)+dble(i1))+& 3196 & gmet(3,2)*(kpoint(3)+dble(i3))*(kpoint(2)+dble(i2))+& 3197 & gmet(1,3)*(kpoint(1)+dble(i1))*(kpoint(3)+dble(i3))) 3198 if (gs>cutoff) then 3199 cut_pws(ig) = zero 3200 else 3201 cut_pws(ig) = (1.0_dp-(gs/cutoff))**power 3202 end if 3203 end do 3204 3205 !Run through bands (real and imaginary components) 3206 !$OMP PARALLEL DO PRIVATE(spad,igs) 3207 do nn=1,nband 3208 spad = (nn-1)*npw*nspinor+icgmod 3209 do ispinor=1,nspinor 3210 do ig=1,npw 3211 igs=ig+(ispinor-1)*npw 3212 cg(1,igs+spad) = cg(1,igs+spad) * cut_pws(ig) 3213 cg(2,igs+spad) = cg(2,igs+spad) * cut_pws(ig) 3214 end do 3215 end do 3216 end do 3217 3218 ABI_FREE(cut_pws) 3219 3220 end subroutine cg_envlop
m_cgtools/cg_from_reim [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_from_reim
FUNCTION
INPUTS
SOURCE
375 subroutine cg_from_reim(npw, ndat, reim, factor, cg) 376 377 !Arguments ------------------------------------ 378 !scalars 379 integer,intent(in) :: npw,ndat 380 real(dp),intent(in) :: factor 381 !arrays 382 real(dp),intent(in) :: reim(npw*2, ndat) 383 real(dp),intent(out) :: cg(2*npw, ndat) 384 385 !Local variables------------------------------- 386 integer :: idat 387 388 ! ************************************************************************* 389 390 ! UnPack real and imaginary part and multiply by scale factor if /= one. 391 do idat=1,ndat 392 call dcopy(npw, reim(1, idat), 1, cg(1, idat), 2) 393 call dcopy(npw, reim(npw+1, idat), 1, cg(2, idat), 2) 394 if (factor /= one) call dscal(2*npw, factor, cg(1, idat), 1) 395 end do 396 397 end subroutine cg_from_reim
m_cgtools/cg_fromcplx [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_fromcplx
FUNCTION
Convert a complex array to a real array with (real,imag) part
INPUTS
n = Specifies the number of elements in icplx and ocg. icplx(n)=Input complex array.
OUTPUT
ocg(2*n)=Output array with real and imaginary part.
SOURCE
197 subroutine cg_fromcplx(n, icplx, ocg) 198 199 !Arguments ------------------------------------ 200 !scalars 201 integer,intent(in) :: n 202 !arrays 203 real(dp),intent(out) :: ocg(2*n) 204 complex(dpc),intent(in) :: icplx(n) 205 206 !Local variables ------------------------------ 207 !scalars 208 integer :: ii,idx 209 210 ! ************************************************************************* 211 212 !$OMP PARALLEL DO PRIVATE(idx) 213 do ii=1,n 214 idx = 2*ii-1 215 ocg(idx ) = DBLE (icplx(ii)) 216 ocg(idx+1) = AIMAG(icplx(ii)) 217 end do 218 219 end subroutine cg_fromcplx
m_cgtools/cg_get_eigens [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_get_eigens
FUNCTION
Helper functions to compute <i|H|i> / <i|S|i> for ndat states. Assume normalized input wavefunctions.
INPUTS
OUTPUT
SOURCE
5583 subroutine cg_get_eigens(usepaw, istwf_k, npwsp, ndat, cg, ghc, gsc, eig, me_g0, comm) 5584 5585 integer,intent(in) :: usepaw, istwf_k, npwsp, ndat, me_g0, comm 5586 real(dp),intent(in) :: ghc(2*npwsp, ndat), cg(2*npwsp, ndat), gsc(2*npwsp, ndat*usepaw) 5587 real(dp),intent(out) :: eig(ndat) 5588 5589 !Local variables------------------------------- 5590 integer,parameter :: option1 = 1 5591 integer :: idat, ierr 5592 real(dp) :: doti, dots_r(ndat) 5593 ! ************************************************************************* 5594 5595 ! <psi|H|psi> / <psi|S|psi> 5596 !$OMP PARALLEL DO 5597 do idat=1,ndat 5598 call dotprod_g(eig(idat), doti, istwf_k, npwsp, option1, ghc(:,idat), cg(:,idat), me_g0, xmpi_comm_self) 5599 if (usepaw == 1) then 5600 call dotprod_g(dots_r(idat), doti, istwf_k, npwsp, option1, gsc(:,idat), cg(:,idat), me_g0, xmpi_comm_self) 5601 end if 5602 end do 5603 5604 if (xmpi_comm_size(comm) > 1) then 5605 call xmpi_sum(eig, comm, ierr) 5606 if (usepaw == 1) call xmpi_sum(dots_r, comm, ierr) 5607 end if 5608 5609 if (usepaw == 1) eig(:) = eig(:) / dots_r(:) 5610 5611 end subroutine cg_get_eigens
m_cgtools/cg_get_residvecs [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_get_residvecs
FUNCTION
Compute redidual vectors (H - eS) |psi> for ndat states.
INPUTS
OUTPUT
SOURCE
5627 subroutine cg_get_residvecs(usepaw, npwsp, ndat, eig, cg, ghc, gsc, residvecs) 5628 5629 integer,intent(in) :: usepaw, npwsp, ndat 5630 real(dp),intent(in) :: eig(ndat) 5631 real(dp),intent(in) :: ghc(2*npwsp, ndat), cg(2*npwsp, ndat), gsc(2*npwsp, ndat*usepaw) 5632 real(dp),intent(out) :: residvecs(2*npwsp, ndat) 5633 5634 !Local variables------------------------------- 5635 integer :: idat 5636 ! ************************************************************************* 5637 5638 if (usepaw == 1) then 5639 ! (H - e) |psi> 5640 !$OMP PARALLEL DO IF (ndat > 1) 5641 do idat=1,ndat 5642 residvecs(:,idat) = ghc(:,idat) - eig(idat) * gsc(:,idat) 5643 end do 5644 else 5645 ! (H - eS) |psi> 5646 !$OMP PARALLEL DO IF (ndat > 1) 5647 do idat=1,ndat 5648 residvecs(:,idat) = ghc(:,idat) - eig(idat) * cg(:,idat) 5649 end do 5650 end if 5651 5652 end subroutine cg_get_residvecs
m_cgtools/cg_getspin [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_getspin
FUNCTION
Sandwich a single wave function on the Pauli matrices
INPUTS
npw_k = number of plane waves cgcband = coefficients of spinorial wave function
OUTPUT
spin = 3-vector of spin components for this state cgcmat = outer spin product of spinorial wf with itself
SOURCE
1737 subroutine cg_getspin(cgcband, npw_k, spin, cgcmat) 1738 1739 !Arguments ------------------------------------ 1740 !scalars 1741 integer, intent(in) :: npw_k 1742 real(dp), intent(in) :: cgcband(2,2*npw_k) 1743 complex(dpc), intent(out),optional :: cgcmat(2,2) 1744 real(dp), intent(out) :: spin(3) 1745 1746 !Local variables------------------------------- 1747 !scalars 1748 complex(dpc) :: cspin(0:3), cgcmat_(2,2) 1749 ! *********************************************************************** 1750 1751 ! cgcmat_ = cgcband * cgcband^T* i.e. 2x2 matrix of spin components (dpcomplex) 1752 cgcmat_ = czero 1753 call zgemm('n','c',2,2,npw_k,cone,cgcband,2,cgcband,2,czero,cgcmat_,2) 1754 1755 ! spin(*) = sum_{si sj pi} cgcband(si,pi)^* pauli_mat*(si,sj) cgcband(sj,pi) 1756 cspin(0) = cgcmat_(1,1)*pauli_mat(1,1,0) + cgcmat_(2,1)*pauli_mat(2,1,0) & 1757 & + cgcmat_(1,2)*pauli_mat(1,2,0) + cgcmat_(2,2)*pauli_mat(2,2,0) 1758 cspin(1) = cgcmat_(1,1)*pauli_mat(1,1,1) + cgcmat_(2,1)*pauli_mat(2,1,1) & 1759 & + cgcmat_(1,2)*pauli_mat(1,2,1) + cgcmat_(2,2)*pauli_mat(2,2,1) 1760 cspin(2) = cgcmat_(1,1)*pauli_mat(1,1,2) + cgcmat_(2,1)*pauli_mat(2,1,2) & 1761 & + cgcmat_(1,2)*pauli_mat(1,2,2) + cgcmat_(2,2)*pauli_mat(2,2,2) 1762 cspin(3) = cgcmat_(1,1)*pauli_mat(1,1,3) + cgcmat_(2,1)*pauli_mat(2,1,3) & 1763 & + cgcmat_(1,2)*pauli_mat(1,2,3) + cgcmat_(2,2)*pauli_mat(2,2,3) 1764 !write(std_out,*) 'cgmat: ', cgcmat_ 1765 !write(std_out,*) 'real(spin): ', real(cspin) 1766 !write(std_out,*) 'aimag(spin): ', aimag(cspin) 1767 1768 spin = real(cspin(1:3)) 1769 if (present(cgcmat)) cgcmat = cgcmat_ 1770 1771 end subroutine cg_getspin
m_cgtools/cg_gsph2box [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_gsph2box
FUNCTION
Array iarrsph is defined in sphere with npw_k points. Insert iarrsph inside box of nx*ny*nz points to define array oarrbox for fft box. rest of oarrbox is filled with 0 s.
INPUTS
iarrsph(2,npw_k*ndat)= contains values for npw_k G vectors in basis sphere ndat=number of FFT to perform. npw_k=number of G vectors in basis at this k point oarrbox(2,ldx*ldy*ldz*ndat) = fft box nx,ny,nz=physical dimension of the box (oarrbox) ldx,ldy,ldz=memory dimension of oarrbox kg_k(3,npw_k)=integer coordinates of G vectors in basis sphere istwf_k=option parameter that describes the storage of wfs
OUTPUT
oarrbox(ldx*ldy*ldz*ndat)
NOTES
If istwf_k differs from 1, then special storage modes must be taken into account, for symmetric wavefunctions coming from k=(0 0 0) or other special k points.
SOURCE
1804 subroutine cg_gsph2box(nx,ny,nz,ldx,ldy,ldz,ndat,npw_k,istwf_k,kg_k,iarrsph,oarrbox) 1805 1806 !Arguments ------------------------------------ 1807 !scalars 1808 integer,intent(in) :: istwf_k,nx,ny,nz,ldx,ldy,ldz,ndat,npw_k 1809 !arrays 1810 integer,intent(in) :: kg_k(3,npw_k) 1811 real(dp),intent(in) :: iarrsph(2,npw_k*ndat) 1812 real(dp),intent(out) :: oarrbox(2,ldx*ldy*ldz*ndat) 1813 1814 !Local variables------------------------------- 1815 !scalars 1816 integer,parameter :: me_g0=1 1817 integer :: ix,ixinv,iy,iyinv,iz,izinv,dat,ipw,npwmin,pad_box,pad_sph,ifft,ifft_inv,ldxyz 1818 character(len=500) :: msg 1819 !arrays 1820 integer,allocatable :: ixinver(:),iyinver(:),izinver(:) 1821 1822 ! ************************************************************************* 1823 1824 !In the case of special k-points, invariant under time-reversal, 1825 !but not Gamma, initialize the inverse coordinates 1826 !Remember indeed that 1827 !u_k(G) = u_{k+G0}(G-G0); u_{-k}(-G) = u_k(G)^* 1828 !and therefore: 1829 !u_{G0/2}(G) = u_{G0/2}(-G-G0)^*. 1830 if (istwf_k>=2) then 1831 ABI_MALLOC(ixinver,(nx)) 1832 ABI_MALLOC(iyinver,(ny)) 1833 ABI_MALLOC(izinver,(nz)) 1834 if ( ANY(istwf_k==(/2,4,6,8/)) ) then 1835 ixinver(1)=1 1836 do ix=2,nx 1837 ixinver(ix)=nx+2-ix 1838 end do 1839 else 1840 do ix=1,nx 1841 ixinver(ix)=nx+1-ix 1842 end do 1843 end if 1844 if (istwf_k>=2 .and. istwf_k<=5) then 1845 iyinver(1)=1 1846 do iy=2,ny 1847 iyinver(iy)=ny+2-iy 1848 end do 1849 else 1850 do iy=1,ny 1851 iyinver(iy)=ny+1-iy 1852 end do 1853 end if 1854 if ( ANY(istwf_k==(/2,3,6,7/)) ) then 1855 izinver(1)=1 1856 do iz=2,nz 1857 izinver(iz)=nz+2-iz 1858 end do 1859 else 1860 do iz=1,nz 1861 izinver(iz)=nz+1-iz 1862 end do 1863 end if 1864 end if 1865 1866 ldxyz = ldx*ldy*ldz 1867 1868 if (istwf_k==1) then 1869 1870 !$OMP PARALLEL DO PRIVATE(pad_sph,pad_box,ix,iy,iz,ifft) 1871 do dat=1,ndat 1872 pad_sph = (dat-1)*npw_k 1873 pad_box = (dat-1)*ldxyz 1874 oarrbox(:,1+pad_box:ldxyz+pad_box) = zero ! zero the sub-array 1875 do ipw=1,npw_k 1876 ix=kg_k(1,ipw); if (ix<0) ix=ix+nx; ix=ix+1 1877 iy=kg_k(2,ipw); if (iy<0) iy=iy+ny; iy=iy+1 1878 iz=kg_k(3,ipw); if (iz<0) iz=iz+nz; iz=iz+1 1879 ifft = ix + (iy-1)*ldx + (iz-1)*ldx*ldy 1880 #if (defined FC_NVHPC) || (defined __INTEL_COMPILER && defined HAVE_OPENMP) 1881 if (ifft<0) stop "prevent from miscompiling this section" 1882 #endif 1883 oarrbox(1,ifft+pad_box) = iarrsph(1,ipw+pad_sph) 1884 oarrbox(2,ifft+pad_box) = iarrsph(2,ipw+pad_sph) 1885 end do 1886 end do 1887 1888 else if (istwf_k>=2) then 1889 ! 1890 npwmin=1 1891 if(istwf_k==2 .and. me_g0==1) then ! If gamma point, then oarrbox must be completed 1892 do dat=1,ndat 1893 pad_sph = (dat-1)*npw_k 1894 pad_box = (dat-1)*ldxyz 1895 oarrbox(1,1+pad_box) = iarrsph(1,1+pad_sph) 1896 oarrbox(2,1+pad_box) = zero 1897 end do 1898 npwmin=2 1899 end if 1900 1901 !$OMP PARALLEL DO PRIVATE(pad_sph,pad_box,ix,iy,iz,ixinv,iyinv,izinv,ifft,ifft_inv) 1902 do dat=1,ndat 1903 pad_sph = (dat-1)*npw_k 1904 pad_box = (dat-1)*ldxyz 1905 oarrbox(:,npwmin+pad_box:ldxyz+pad_box) = zero 1906 do ipw=npwmin,npw_k 1907 ix=kg_k(1,ipw); if(ix<0)ix=ix+nx; ix=ix+1 1908 iy=kg_k(2,ipw); if(iy<0)iy=iy+ny; iy=iy+1 1909 iz=kg_k(3,ipw); if(iz<0)iz=iz+nz; iz=iz+1 1910 ifft = ix + (iy-1)*ldx + (iz-1)*ldx*ldy 1911 #if defined FC_NVHPC 1912 if (ifft<0) stop "prevent from miscompiling this section" 1913 #endif 1914 ! Construct the coordinates of -k-G 1915 ixinv=ixinver(ix); iyinv=iyinver(iy); izinv=izinver(iz) 1916 ifft_inv = ixinv + (iyinv-1)*ldx + (izinv-1)*ldx*ldy 1917 1918 oarrbox(:,ifft +pad_box) = iarrsph(:,ipw+pad_sph) 1919 oarrbox(1,ifft_inv+pad_box) = iarrsph(1,ipw+pad_sph) 1920 oarrbox(2,ifft_inv+pad_box) = -iarrsph(2,ipw+pad_sph) 1921 end do 1922 end do 1923 ! 1924 else 1925 write(msg,'(a,i0)')"Wrong istwfk ",istwf_k 1926 ABI_ERROR(msg) 1927 end if 1928 1929 if (istwf_k>=2) then 1930 ABI_FREE(ixinver) 1931 ABI_FREE(iyinver) 1932 ABI_FREE(izinver) 1933 end if 1934 1935 end subroutine cg_gsph2box
m_cgtools/cg_hprotate_and_get_diag [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_hprotate_and_get_diag
FUNCTION
Compute the diagonal elements of E^H VNLX E where VNLX is an Hermitean matrixin packed form and E is the matrix with eigenvectors as column vectors. Mainly used to rotate the matrix elements of an operator after the subspace diagonalization.
INPUTS
OUTPUT
SOURCE
5459 subroutine cg_hprotate_and_get_diag(nband_k, subvnlx, evec, enlx_k) 5460 5461 !Arguments ------------------------------------ 5462 !scalars 5463 integer,intent(in) :: nband_k 5464 !arrays 5465 real(dp),intent(in) :: subvnlx(nband_k*(nband_k+1)) 5466 real(dp),intent(in) :: evec(2*nband_k,nband_k) 5467 real(dp), intent(out) :: enlx_k(nband_k) 5468 5469 !Local variables ------------------------------ 5470 !scalars 5471 integer :: ii,jj,pidx,iband 5472 real(dp),allocatable :: mat1(:,:,:),matvnl(:,:,:) 5473 5474 ! ************************************************************************* 5475 5476 ABI_MALLOC(matvnl,(2,nband_k,nband_k)) 5477 ABI_MALLOC(mat1,(2,nband_k,nband_k)) 5478 5479 ! Construct upper triangle of matvnl from subvnlx using full storage mode. 5480 pidx=0 5481 do jj=1,nband_k 5482 do ii=1,jj 5483 pidx=pidx+1 5484 matvnl(1,ii,jj)=subvnlx(2*pidx-1) 5485 matvnl(2,ii,jj)=subvnlx(2*pidx ) 5486 end do 5487 end do 5488 5489 call zhemm('L','U',nband_k,nband_k,cone,matvnl,nband_k,evec,nband_k,czero,mat1,nband_k) 5490 5491 !$OMP PARALLEL DO 5492 do iband=1,nband_k 5493 enlx_k(iband) = cg_real_zdotc(nband_k,evec(:,iband),mat1(:,:,iband)) 5494 end do 5495 5496 ABI_FREE(matvnl) 5497 ABI_FREE(mat1) 5498 5499 end subroutine cg_hprotate_and_get_diag
m_cgtools/cg_hrotate_and_get_diag [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_hrotate_and_get_diag
FUNCTION
Compute the diagonal elements of E^H VNLX E where VNLX is an Hermitean matrix. Mainly used to rotate the matrix elements of an operator after the subspace diagonalization.
INPUTS
OUTPUT
SOURCE
5517 subroutine cg_hrotate_and_get_diag(istwf_k, nband_k, totvnlx, evec, enlx_k) 5518 5519 !Arguments ------------------------------------ 5520 !scalars 5521 integer,intent(in) :: istwf_k, nband_k 5522 !arrays 5523 real(dp),intent(in) :: totvnlx(2*nband_k,nband_k) 5524 real(dp),intent(in) :: evec(2*nband_k,nband_k) 5525 real(dp),intent(out) :: enlx_k(nband_k) 5526 5527 !Local variables ------------------------------ 5528 !scalars 5529 real(dp),external :: ddot 5530 integer :: jj,iband 5531 real(dp),allocatable :: mat_loc(:,:),mat1(:,:,:),matvnl(:,:,:), evec_loc(:,:) 5532 5533 ! ************************************************************************* 5534 5535 ABI_MALLOC(matvnl, (2,nband_k, nband_k)) 5536 ABI_MALLOC(mat1, (2, nband_k, nband_k)) 5537 mat1=zero 5538 5539 enlx_k(1:nband_k)=zero 5540 5541 if (istwf_k==1) then 5542 call zhemm('l','l',nband_k,nband_k,cone,totvnlx,nband_k,evec,nband_k,czero,mat1,nband_k) 5543 do iband=1,nband_k 5544 enlx_k(iband)= cg_real_zdotc(nband_k,evec(:,iband),mat1(:,:,iband)) 5545 end do 5546 5547 else if (istwf_k==2) then 5548 ABI_MALLOC(evec_loc,(nband_k,nband_k)) 5549 ABI_MALLOC(mat_loc,(nband_k,nband_k)) 5550 do iband=1,nband_k 5551 do jj=1,nband_k 5552 evec_loc(iband,jj)=evec(2*iband-1,jj) 5553 end do 5554 end do 5555 call dsymm('l','l',nband_k,nband_k,one,totvnlx,nband_k,evec_loc,nband_k,zero,mat_loc,nband_k) 5556 do iband=1,nband_k 5557 enlx_k(iband)=ddot(nband_k,evec_loc(:,iband),1,mat_loc(:,iband),1) 5558 end do 5559 ABI_FREE(evec_loc) 5560 ABI_FREE(mat_loc) 5561 end if 5562 5563 ABI_FREE(matvnl) 5564 ABI_FREE(mat1) 5565 5566 end subroutine cg_hrotate_and_get_diag
m_cgtools/cg_kfilter [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_kfilter
FUNCTION
INPUTS
nband=Number of vectors in icg1
SOURCE
235 pure subroutine cg_kfilter(npw_k, my_nspinor, nband_k, kinpw, cg) 236 237 !Arguments ------------------------------------ 238 !scalars 239 integer,intent(in) :: npw_k, my_nspinor, nband_k 240 !arrays 241 real(dp), intent(in) :: kinpw(npw_k) 242 real(dp),intent(inout) :: cg(2,npw_k*my_nspinor*nband_k) 243 244 !Local variables------------------------------- 245 integer :: ispinor, iband, igs, iwavef, ipw 246 247 ! ************************************************************************* 248 249 ! Filter the WFs when modified kinetic energy is too large (see routine mkkin.f) 250 ! !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(igs, iwavef) 251 do ispinor=1,my_nspinor 252 igs=(ispinor-1)*npw_k 253 do iband=1,nband_k 254 iwavef=(iband-1)*npw_k*my_nspinor 255 do ipw=1+igs,npw_k+igs 256 if (kinpw(ipw-igs)>huge(zero)*1.d-11)then 257 cg(1,ipw+iwavef)=zero 258 cg(2,ipw+iwavef)=zero 259 end if 260 end do 261 end do 262 end do 263 264 end subroutine cg_kfilter
m_cgtools/cg_norm2g [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_norm2g
FUNCTION
Compute <psi|psi> for ndat states distributed inside communicator comm.
INPUTS
OUTPUT
SOURCE
5668 subroutine cg_norm2g(istwf_k, npwsp, ndat, cg, norms, me_g0, comm) 5669 5670 integer,intent(in) :: istwf_k, npwsp, ndat, me_g0, comm 5671 real(dp),intent(in) :: cg(2*npwsp, ndat) 5672 real(dp),intent(out) :: norms(ndat) 5673 5674 !Local variables------------------------------- 5675 integer :: idat, ierr 5676 ! ************************************************************************* 5677 5678 !$OMP PARALLEL DO IF (ndat > 1) 5679 do idat=1,ndat 5680 call sqnorm_g(norms(idat), istwf_k, npwsp, cg(:,idat), me_g0, xmpi_comm_self) 5681 end do 5682 if (xmpi_comm_size(comm) > 1) call xmpi_sum(norms, comm, ierr) 5683 5684 end subroutine cg_norm2g
m_cgtools/cg_normev [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_normev
FUNCTION
Normalize a set of nband eigenvectors of complex length npw (real length 2*npw) and set phases to make cg(i,i) real and positive. Near convergence, cg(i,j) approaches delta(i,j).
INPUTS
cg(2*npw,nband)=unnormalized eigenvectors npw=dimension of cg as shown nband=number of eigenvectors and complex length thereof.
OUTPUT
cg(2*npw,nband)=nband normalized eigenvectors
SOURCE
3244 subroutine cg_normev(cg, npw, nband) 3245 3246 !Arguments ------------------------------------ 3247 !scalars 3248 integer,intent(in) :: npw,nband 3249 !arrays 3250 real(dp),intent(inout) :: cg(2*npw,nband) 3251 3252 !Local variables------------------------------- 3253 !scalars 3254 integer :: ii,jj 3255 real(dp) :: den,evim,evre,phim,phre,xnorm 3256 character(len=500) :: msg 3257 3258 ! ************************************************************************* 3259 3260 !Loop over vectors 3261 do ii=1,nband 3262 ! find norm 3263 xnorm=0.0d0 3264 do jj=1,2*npw 3265 xnorm=xnorm+cg(jj,ii)**2 3266 end do 3267 3268 if((xnorm-one)**2>tol6)then 3269 write(msg,'(6a,i6,a,es16.6,3a)' )ch10,& 3270 'normev: ',ch10,& 3271 'Starting xnorm should be close to one (tol is tol6).',ch10,& 3272 'However, for state number',ii,', xnorm=',xnorm,ch10,& 3273 'It might be that your LAPACK library has not been correctly installed.' 3274 ABI_BUG(msg) 3275 end if 3276 3277 xnorm=1.0d0/sqrt(xnorm) 3278 ! Set up phase 3279 phre=cg(2*ii-1,ii) 3280 phim=cg(2*ii,ii) 3281 if (phim/=0.0d0) then 3282 den=1.0d0/sqrt(phre**2+phim**2) 3283 phre=phre*xnorm*den 3284 phim=phim*xnorm*den 3285 else 3286 ! give xnorm the same sign as phre (negate if negative) 3287 phre=sign(xnorm,phre) 3288 phim=0.0d0 3289 end if 3290 ! normalize with phase change 3291 do jj=1,2*npw,2 3292 evre=cg(jj,ii) 3293 evim=cg(jj+1,ii) 3294 cg(jj,ii)=phre*evre+phim*evim 3295 cg(jj+1,ii)=phre*evim-phim*evre 3296 end do 3297 end do 3298 3299 end subroutine cg_normev
m_cgtools/cg_precon [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_precon
FUNCTION
precondition <G|(H-e)|C>
INPUTS
cg(2,npw)=<G|C>. eval=current band eigenvalue = <C|H|C>. istwf_k=option parameter that describes the storage of wfs kinpw(npw)=(modified) kinetic energy for each plane wave (Hartree) nspinor=number of spinorial components of the wavefunctions vect(2,npw)=<G|H|C>. npw=number of planewaves at this k point. optekin= 1 if the kinetic energy used in preconditionning is modified according to Kresse, Furthmuller, PRB 54, 11169 (1996) [[cite:Kresse1996]] 0 otherwise mg_g0=1 if the node treats G=0. comm=MPI communicator
OUTPUT
pcon(npw)=preconditioning matrix vect(2,npw*nspinor)=<G|(H-eval)|C_{n,k}>*(polynomial ratio)
SOURCE
3332 subroutine cg_precon(cg, eval, istwf_k, kinpw, npw, nspinor, me_g0, optekin, pcon, vect, comm) 3333 3334 !Arguments ------------------------------------ 3335 !scalars 3336 integer,intent(in) :: istwf_k,npw,nspinor,optekin,me_g0,comm 3337 real(dp),intent(in) :: eval 3338 !arrays 3339 real(dp),intent(in) :: cg(2,npw*nspinor),kinpw(npw) 3340 real(dp),intent(inout) :: vect(2,npw*nspinor) 3341 real(dp),intent(out) :: pcon(npw) 3342 3343 !Local variables------------------------------- 3344 !scalars 3345 integer :: ierr,ig,igs,ipw1,ispinor 3346 real(dp) :: ek0,ek0_inv,fac,poly,xx 3347 character(len=500) :: msg 3348 !arrays 3349 real(dp) :: tsec(2) 3350 3351 ! ************************************************************************* 3352 3353 !Compute mean kinetic energy of band 3354 if(istwf_k==1)then 3355 ek0=zero 3356 do ispinor=1,nspinor 3357 igs=(ispinor-1)*npw 3358 do ig=1+igs,npw+igs 3359 if(kinpw(ig-igs)<huge(zero)*1.d-11)then 3360 ek0=ek0+kinpw(ig-igs)*(cg(1,ig)**2+cg(2,ig)**2) 3361 end if 3362 end do 3363 end do 3364 3365 else if (istwf_k>=2)then 3366 if (istwf_k==2 .and. me_g0 == 1)then 3367 ek0=zero ; ipw1=2 3368 if(kinpw(1)<huge(zero)*1.d-11)ek0=0.5_dp*kinpw(1)*cg(1,1)**2 3369 else 3370 ek0=zero ; ipw1=1 3371 end if 3372 do ispinor=1,nspinor 3373 igs=(ispinor-1)*npw 3374 do ig=ipw1+igs,npw+igs 3375 if(kinpw(ig)<huge(zero)*1.d-11)then 3376 ek0=ek0+kinpw(ig)*(cg(1,ig)**2+cg(2,ig)**2) 3377 end if 3378 end do 3379 end do 3380 ek0=2.0_dp*ek0 3381 end if 3382 3383 call timab(48,1,tsec) 3384 call xmpi_sum(ek0,comm,ierr) 3385 call timab(48,2,tsec) 3386 3387 if(ek0<1.0d-10)then 3388 write(msg,'(3a)')'The mean kinetic energy of a wavefunction vanishes.',ch10,'It is reset to 0.1 Ha.' 3389 ABI_WARNING(msg) 3390 ek0=0.1_dp 3391 end if 3392 3393 if (optekin==1) then 3394 ek0_inv=2.0_dp/(3._dp*ek0) 3395 else 3396 ek0_inv=1.0_dp/ek0 3397 end if 3398 3399 !Carry out preconditioning 3400 do ispinor=1,nspinor 3401 igs=(ispinor-1)*npw 3402 !$OMP PARALLEL DO PRIVATE(fac,ig,poly,xx) SHARED(cg,ek0_inv,eval,kinpw,igs,npw,vect,pcon) 3403 do ig=1+igs,npw+igs 3404 if(kinpw(ig-igs)<huge(zero)*1.d-11)then 3405 xx=kinpw(ig-igs)*ek0_inv 3406 ! Teter polynomial ratio 3407 poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp)) 3408 fac=poly/(poly+16._dp*xx**4) 3409 if (optekin==1) fac=two*fac 3410 pcon(ig-igs)=fac 3411 vect(1,ig)=( vect(1,ig)-eval*cg(1,ig) )*fac 3412 vect(2,ig)=( vect(2,ig)-eval*cg(2,ig) )*fac 3413 else 3414 pcon(ig-igs)=zero 3415 vect(1,ig)=zero 3416 vect(2,ig)=zero 3417 end if 3418 end do 3419 end do 3420 3421 end subroutine cg_precon
m_cgtools/cg_precon_block [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_precon_block
FUNCTION
precondition $<G|(H-e_{n,k})|C_{n,k}>$ for a block of band (band-FFT parallelisation) in the case of real WFs (istwfk/=1)
INPUTS
blocksize= size of blocks of bands cg(vectsize,blocksize)=<G|C_{n,k}> for a block of bands. eval(blocksize,blocksize)=current block of bands eigenvalues=<C_{n,k}|H|C_{n,k}>. ghc(vectsize,blocksize)=<G|H|C_{n,k}> for a block of bands. iterationnumber=number of iterative minimizations in LOBPCG kinpw(npw)=(modified) kinetic energy for each plane wave (Hartree) nspinor=number of spinorial components of the wavefunctions (on current proc) $vect(vectsize,blocksize)=<G|H|C_{n,k}> for a block of bands$. npw=number of planewaves at this k point. optekin= 1 if the kinetic energy used in preconditionning is modified according to Kresse, Furthmuller, PRB 54, 11169 (1996) [[cite:Kresse1996]] 0 otherwise optpcon= 0 the TPA preconditionning matrix does not depend on band 1 the TPA preconditionning matrix (not modified) 2 the TPA preconditionning matrix is independant of iteration number vectsize= size of vectors mg_g0=1 if this node has Gamma, 0 otherwise.
OUTPUT
vect(2,npw)=<g|(h-eval)|c_{n,k}>*(polynomial ratio)
SIDE EFFECTS
pcon(npw,blocksize)=preconditionning matrix input if optpcon=0,2 and iterationnumber/=1 output if optpcon=0,2 and iterationnumber==1
SOURCE
3462 subroutine cg_precon_block(cg,eval,blocksize,iterationnumber,kinpw,& 3463 & npw,nspinor,me_g0,optekin,optpcon,pcon,ghc,vect,vectsize,comm) 3464 3465 !Arguments ------------------------------------ 3466 !scalars 3467 integer,intent(in) :: blocksize,iterationnumber,npw,nspinor,optekin 3468 integer,intent(in) :: optpcon,vectsize,me_g0,comm 3469 !arrays 3470 real(dp),intent(in) :: cg(vectsize,blocksize),eval(blocksize,blocksize) 3471 real(dp),intent(in) :: ghc(vectsize,blocksize),kinpw(npw) 3472 real(dp),intent(inout) :: pcon(npw,blocksize),vect(vectsize,blocksize) 3473 3474 !Local variables------------------------------- 3475 !scalars 3476 integer :: iblocksize,ierr,ig,igs,ipw1,ispinor 3477 real(dp) :: fac,poly,xx 3478 character(len=500) :: msg 3479 !arrays 3480 real(dp) :: tsec(2) 3481 real(dp),allocatable :: ek0(:),ek0_inv(:) 3482 3483 ! ************************************************************************* 3484 3485 call timab(536,1,tsec) 3486 3487 !In this case, the Teter, Allan and Payne preconditioner is approximated: 3488 !the factor xx=Ekin(G) and no more Ekin(G)/Ekin(iband) 3489 if (optpcon==0) then 3490 do ispinor=1,nspinor 3491 igs=(ispinor-1)*npw 3492 if (me_g0 == 1) then 3493 do ig=1+igs,1+igs !g=0 3494 if (iterationnumber==1) then 3495 if(kinpw(ig-igs)<huge(zero)*1.d-11)then 3496 xx=kinpw(ig-igs) 3497 ! teter polynomial ratio 3498 poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp)) 3499 fac=poly/(poly+16._dp*xx**4) 3500 if (optekin==1) fac=two*fac 3501 pcon(ig-igs,1)=fac 3502 do iblocksize=1,blocksize 3503 vect(ig,iblocksize)=(ghc(ig,iblocksize)-& 3504 & eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,1) 3505 end do 3506 else 3507 pcon(ig-igs,1)=zero 3508 vect(ig,:)=0.0_dp 3509 end if 3510 else 3511 do iblocksize=1,blocksize 3512 vect(ig,iblocksize)=(ghc(ig,iblocksize)-& 3513 & eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,1) 3514 end do 3515 end if 3516 end do 3517 do ig=2+igs,npw+igs 3518 if (iterationnumber==1) then 3519 if(kinpw(ig-igs)<huge(zero)*1.d-11)then 3520 xx=kinpw(ig-igs) 3521 ! teter polynomial ratio 3522 poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp)) 3523 fac=poly/(poly+16._dp*xx**4) 3524 if (optekin==1) fac=two*fac 3525 pcon(ig-igs,1)=fac 3526 do iblocksize=1,blocksize 3527 vect(ig,iblocksize)=(ghc(ig,iblocksize)-& 3528 & eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,1) 3529 vect(ig+npw-1,iblocksize)=(ghc(ig+npw-1,iblocksize)-& 3530 & eval(iblocksize,iblocksize)*cg(ig+npw-1,iblocksize))*pcon(ig-igs,1) 3531 end do 3532 else 3533 pcon(ig-igs,1)=zero 3534 vect(ig,:)=zero 3535 vect(ig+npw-1,:)=zero 3536 end if 3537 else 3538 do iblocksize=1,blocksize 3539 vect(ig,iblocksize)=(ghc(ig,iblocksize)-& 3540 & eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,1) 3541 vect(ig+npw-1,iblocksize)=(ghc(ig+npw-1,iblocksize)-& 3542 & eval(iblocksize,iblocksize)*cg(ig+npw-1,iblocksize))*pcon(ig-igs,1) 3543 end do 3544 end if 3545 end do 3546 else 3547 do ig=1+igs,npw+igs 3548 if (iterationnumber==1) then 3549 if(kinpw(ig-igs)<huge(zero)*1.d-11)then 3550 xx=kinpw(ig-igs) 3551 ! teter polynomial ratio 3552 poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp)) 3553 fac=poly/(poly+16._dp*xx**4) 3554 if (optekin==1) fac=two*fac 3555 pcon(ig-igs,1)=fac 3556 do iblocksize=1,blocksize 3557 vect(ig,iblocksize)=(ghc(ig,iblocksize)-& 3558 & eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,1) 3559 vect(ig+npw,iblocksize)=(ghc(ig+npw,iblocksize)-& 3560 & eval(iblocksize,iblocksize)*cg(ig+npw,iblocksize))*pcon(ig-igs,1) 3561 end do 3562 else 3563 pcon(ig-igs,:)=zero 3564 vect(ig,:)=zero 3565 vect(ig+npw,:)=zero 3566 end if 3567 else 3568 do iblocksize=1,blocksize 3569 vect(ig,iblocksize)=(ghc(ig,iblocksize)-& 3570 & eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,1) 3571 vect(ig+npw,iblocksize)=(ghc(ig+npw,iblocksize)-& 3572 & eval(iblocksize,iblocksize)*cg(ig+npw,iblocksize))*pcon(ig-igs,1) 3573 end do 3574 end if 3575 end do 3576 end if 3577 end do 3578 3579 else if (optpcon>0) then 3580 ! Compute mean kinetic energy of all bands 3581 ABI_MALLOC(ek0,(blocksize)) 3582 ABI_MALLOC(ek0_inv,(blocksize)) 3583 if (iterationnumber==1.or.optpcon==1) then 3584 do iblocksize=1,blocksize 3585 if (me_g0 == 1)then 3586 ek0(iblocksize)=0.0_dp ; ipw1=2 3587 if(kinpw(1)<huge(zero)*1.d-11)ek0(iblocksize)=0.5_dp*kinpw(1)*cg(1,iblocksize)**2 3588 do ig=ipw1,npw 3589 if(kinpw(ig)<huge(zero)*1.d-11)then 3590 ek0(iblocksize)=ek0(iblocksize)+& 3591 & kinpw(ig)*(cg(ig,iblocksize)**2+cg(ig+npw-1,iblocksize)**2) 3592 end if 3593 end do 3594 else 3595 ek0(iblocksize)=0.0_dp ; ipw1=1 3596 do ig=ipw1,npw 3597 if(kinpw(ig)<huge(zero)*1.d-11)then 3598 ek0(iblocksize)=ek0(iblocksize)+& 3599 & kinpw(ig)*(cg(ig,iblocksize)**2+cg(ig+npw,iblocksize)**2) 3600 end if 3601 end do 3602 end if 3603 end do 3604 3605 call xmpi_sum(ek0,comm,ierr) 3606 3607 do iblocksize=1,blocksize 3608 if(ek0(iblocksize)<1.0d-10)then 3609 write(msg, '(4a)' )ch10,& 3610 'cg_precon_block: the mean kinetic energy of a wavefunction vanishes.',ch10,& 3611 'it is reset to 0.1ha.' 3612 ABI_WARNING(msg) 3613 ek0(iblocksize)=0.1_dp 3614 end if 3615 end do 3616 if (optekin==1) then 3617 ek0_inv(:)=2.0_dp/(3._dp*ek0(:)) 3618 else 3619 ek0_inv(:)=1.0_dp/ek0(:) 3620 end if 3621 end if !iterationnumber==1.or.optpcon==1 3622 3623 ! Carry out preconditioning 3624 do iblocksize=1,blocksize 3625 do ispinor=1,nspinor 3626 igs=(ispinor-1)*npw 3627 if (me_g0 == 1) then 3628 do ig=1+igs,1+igs !g=0 3629 if (iterationnumber==1.or.optpcon==1) then 3630 if(kinpw(ig-igs)<huge(zero)*1.d-11)then 3631 xx=kinpw(ig-igs)*ek0_inv(iblocksize) 3632 ! teter polynomial ratio 3633 poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp)) 3634 fac=poly/(poly+16._dp*xx**4) 3635 if (optekin==1) fac=two*fac 3636 pcon(ig-igs,iblocksize)=fac 3637 vect(ig,iblocksize)=(ghc(ig,iblocksize)-& 3638 & eval(iblocksize,iblocksize)*cg(ig,iblocksize))*fac 3639 else 3640 pcon(ig-igs,iblocksize)=zero 3641 vect(ig,iblocksize)=0.0_dp 3642 end if 3643 else 3644 vect(ig,iblocksize)=(ghc(ig,iblocksize)-& 3645 & eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,iblocksize) 3646 end if 3647 end do 3648 do ig=2+igs,npw+igs 3649 if (iterationnumber==1.or.optpcon==1) then 3650 if(kinpw(ig-igs)<huge(zero)*1.d-11)then 3651 xx=kinpw(ig-igs)*ek0_inv(iblocksize) 3652 ! teter polynomial ratio 3653 poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp)) 3654 fac=poly/(poly+16._dp*xx**4) 3655 if (optekin==1) fac=two*fac 3656 pcon(ig-igs,iblocksize)=fac 3657 vect(ig,iblocksize)=(ghc(ig,iblocksize)-& 3658 & eval(iblocksize,iblocksize)*cg(ig,iblocksize))*fac 3659 vect(ig+npw-1,iblocksize)=(ghc(ig+npw-1,iblocksize)-& 3660 & eval(iblocksize,iblocksize)*cg(ig+npw-1,iblocksize))*fac 3661 else 3662 pcon(ig-igs,iblocksize)=zero 3663 vect(ig,iblocksize)=zero 3664 vect(ig+npw-1,iblocksize)=zero 3665 end if 3666 else 3667 vect(ig,iblocksize)=(ghc(ig,iblocksize)-& 3668 & eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,iblocksize) 3669 vect(ig+npw-1,iblocksize)=(ghc(ig+npw-1,iblocksize)-& 3670 & eval(iblocksize,iblocksize)*cg(ig+npw-1,iblocksize))*pcon(ig-igs,iblocksize) 3671 end if 3672 end do 3673 else 3674 do ig=1+igs,npw+igs 3675 if (iterationnumber==1.or.optpcon==1) then 3676 if(kinpw(ig-igs)<huge(zero)*1.d-11)then 3677 xx=kinpw(ig-igs)*ek0_inv(iblocksize) 3678 ! teter polynomial ratio 3679 poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp)) 3680 fac=poly/(poly+16._dp*xx**4) 3681 if (optekin==1) fac=two*fac 3682 pcon(ig-igs,iblocksize)=fac 3683 vect(ig,iblocksize)=(ghc(ig,iblocksize)-& 3684 & eval(iblocksize,iblocksize)*cg(ig,iblocksize))*fac 3685 vect(ig+npw,iblocksize)=(ghc(ig+npw,iblocksize)-& 3686 & eval(iblocksize,iblocksize)*cg(ig+npw,iblocksize))*fac 3687 else 3688 pcon(ig-igs,iblocksize)=zero 3689 vect(ig,iblocksize)=zero 3690 vect(ig+npw,iblocksize)=zero 3691 end if 3692 else 3693 vect(ig,iblocksize)=(ghc(ig,iblocksize)-& 3694 & eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,iblocksize) 3695 vect(ig+npw,iblocksize)=(ghc(ig+npw,iblocksize)-& 3696 & eval(iblocksize,iblocksize)*cg(ig+npw,iblocksize))*pcon(ig-igs,iblocksize) 3697 end if 3698 end do 3699 end if 3700 end do 3701 end do 3702 ABI_FREE(ek0) 3703 ABI_FREE(ek0_inv) 3704 end if !optpcon 3705 3706 call timab(536,2,tsec) 3707 3708 end subroutine cg_precon_block
m_cgtools/cg_precon_many [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_precon_many
FUNCTION
INPUTS
OUTPUT
SOURCE
5751 subroutine cg_precon_many(istwf_k, npw, nspinor, ndat, cg, optekin, kinpw, vect, me_g0, comm) 5752 5753 integer,intent(in) :: istwf_k, npw, nspinor, optekin, ndat, me_g0, comm 5754 real(dp),intent(in) :: cg(2*npw*nspinor,ndat), kinpw(npw) 5755 real(dp),intent(inout) :: vect(2*npw*nspinor,ndat) 5756 5757 !Local variables------------------------------- 5758 integer :: idat 5759 real(dp),allocatable :: pcon(:) 5760 ! ************************************************************************* 5761 5762 ! TODO: Optimized version for MPI with ndat > 1 5763 ABI_MALLOC(pcon, (npw)) 5764 do idat=1,ndat 5765 call cg_precon(cg(:,idat), zero, istwf_k, kinpw, npw, nspinor, me_g0, optekin, pcon, vect(:,idat), comm) 5766 end do 5767 ABI_FREE(pcon) 5768 5769 !call cg_kinene(istwf_k, npw, nspinor, ndat, cg, me_g0, comm) 5770 !call cg_zprecon_block(cg,eval,blocksize,iterationnumber,kinpw, npw,nspinor,optekin,optpcon,pcon,ghc,vect,vectsize,comm) 5771 5772 end subroutine cg_precon_many
m_cgtools/cg_real_zdotc [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_real_zdotc
FUNCTION
Perform a vector-vector operation defined as res = REAL (\Sigma (conjg(x)*y)) where x and y are n-element vectors.
INPUTS
n = Specifies the number of elements in vector x and y x,y = Input arrays.
OUTPUT
res=Real part of the scalar product.
SOURCE
580 function cg_real_zdotc(n,x,y) result(res) 581 582 !Arguments ------------------------------------ 583 !scalars 584 integer,intent(in) :: n 585 !arrays 586 real(dp),intent(in) :: x(2,n) 587 real(dp),intent(in) :: y(2,n) 588 real(dp) :: res 589 590 !Local variables------------------------------- 591 real(dp),external :: ddot 592 593 ! ************************************************************************* 594 595 res = ddot(2*n, x, 1, y, 1) 596 597 end function cg_real_zdotc
m_cgtools/cg_set_imag0_to_zero [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_set_imag0_to_zero
FUNCTION
Set the imaginary part at G=0 to zero if istwfk == 2 and this proc has the gamma point
INPUTS
npwsp=Size of each vector (usually npw*nspinor) istwfk=Storage mode for the wavefunctions. 1 for standard full mode me_g0=1 if this node has G=0.
SIDE EFFECTS
cg(2*npwsp*nband) input: Input set of vectors. output: Orthonormalized set.
SOURCE
5838 pure subroutine cg_set_imag0_to_zero(istwfk, me_g0, npwsp, nband, cg, max_absimag) 5839 5840 !Arguments ------------------------------------ 5841 !scalars 5842 integer,intent(in) :: istwfk, me_g0, npwsp, nband 5843 !arrays 5844 real(dp),intent(inout) :: cg(2,npwsp*nband) 5845 real(dp),intent(out) :: max_absimag 5846 5847 !Local variables ------------------------------ 5848 integer :: ib, ii 5849 5850 ! ************************************************************************* 5851 5852 max_absimag = zero 5853 if (istwfk == 2 .and. me_g0 == 1) then 5854 do ib=1,nband 5855 ii = 1 + (ib - 1) * npwsp 5856 max_absimag = max(max_absimag, abs(cg(2, ii))) 5857 cg(2, ii) = zero 5858 end do 5859 end if 5860 5861 end subroutine cg_set_imag0_to_zero
m_cgtools/cg_setaug_zero [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_setaug_zero
FUNCTION
Set to zero all elements of the array that are not in the FFT box.
INPUTS
nx,ny,nz=physical dimensions of the FFT box ldx,ldy,ldx=memory dimension of arr ndat=number of FFTs SIDE EFFECT arr(2,ldx,ldy,ldz*ndat)= all entries in the augmented region are set to zero
SOURCE
286 pure subroutine cg_setaug_zero(cplex,nx,ny,nz,ldx,ldy,ldz,ndat,arr) 287 288 !Arguments ------------------------------------ 289 !scalars 290 integer,intent(in) :: cplex,nx,ny,nz,ldx,ldy,ldz,ndat 291 !arrays 292 real(dp),intent(inout) :: arr(cplex,ldx,ldy,ldz*ndat) 293 294 !Local variables------------------------------- 295 integer :: iy,iz,dat,padat 296 297 ! ************************************************************************* 298 299 if (nx /= ldx) then 300 do iz=1,ldz*ndat 301 do iy=1,ldy 302 arr(:,nx+1:ldx,iy,iz) = zero 303 end do 304 end do 305 end if 306 307 if (ny /= ldy) then 308 do iz=1,ldz*ndat 309 arr(:,:,ny+1:ldy,iz) = zero 310 end do 311 end if 312 313 if (nz /= ldz) then 314 do dat=1,ndat 315 padat = ldz*(dat-1) 316 do iz=nz+1,ldz 317 arr(:,:,:,iz+padat) = zero 318 end do 319 end do 320 end if 321 322 end subroutine cg_setaug_zero
m_cgtools/cg_to_reim [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_to_reim
FUNCTION
INPUTS
SOURCE
337 subroutine cg_to_reim(npw, ndat, cg, factor, reim) 338 339 !Arguments ------------------------------------ 340 !scalars 341 integer,intent(in) :: npw,ndat 342 real(dp),intent(in) :: factor 343 !arrays 344 real(dp),intent(in) :: cg(2*npw,ndat) 345 real(dp),intent(out) :: reim(npw*2,ndat) 346 347 !Local variables------------------------------- 348 integer :: idat 349 350 ! ************************************************************************* 351 352 ! Pack real and imaginary part of the wavefunctions. 353 ! and multiply by scale factor if factor /= one. 354 do idat=1,ndat 355 call dcopy(npw, cg(1, idat), 2, reim(1, idat), 1) 356 call dcopy(npw, cg(2, idat), 2, reim(npw+1, idat), 1) 357 if (factor /= one) call dscal(2*npw, factor, reim(1, idat), 1) 358 end do 359 360 end subroutine cg_to_reim
m_cgtools/cg_tocplx [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_tocplx
FUNCTION
Convert a real array with (real,imag) part to complex.
INPUTS
n = Specifies the number of elements in cg and ocplx cg(2*n)=Input array with real and imaginary part.
OUTPUT
ocplx(n)=Output complex array.
SOURCE
155 subroutine cg_tocplx(n, cg, ocplx) 156 157 !Arguments ------------------------------------ 158 !scalars 159 integer,intent(in) :: n 160 !arrays 161 real(dp),intent(in) :: cg(2*n) 162 complex(dpc),intent(out) :: ocplx(n) 163 164 !Local variables ------------------------------ 165 !scalars 166 integer :: ii,idx 167 168 ! ************************************************************************* 169 170 !$OMP PARALLEL DO PRIVATE(idx) 171 do ii=1,n 172 idx = 2*ii-1 173 ocplx(ii) = DCMPLX(cg(idx),cg(idx+1)) 174 end do 175 176 end subroutine cg_tocplx
m_cgtools/cg_vlocpsi [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_vlocpsi
FUNCTION
Apply the local part of the potentatil to the wavefunction in real space.
INPUTS
nx,ny,nz=physical dimension of the FFT box. ldx,ldy,ldz=leading dimensions of the arrays. ndat=number of wavefunctions. cplex= 1 if vloc is real, 2 for complex vloc(cplex*ldx,ldy,ldz)=Local potential on the FFT box.
SIDE EFFECTS
ur(2,ldx,ldy,ldz*ndat)= Input = wavefunctions in real space. Output= vloc |ur>
SOURCE
2145 subroutine cg_vlocpsi(nx,ny,nz,ldx,ldy,ldz,ndat,cplex,vloc,ur) 2146 2147 !Arguments ------------------------------------ 2148 !scalars 2149 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,cplex 2150 !arrays 2151 real(dp),intent(in) :: vloc(cplex*ldx,ldy,ldz) 2152 real(dp),intent(inout) :: ur(2,ldx,ldy,ldz*ndat) 2153 2154 !Local variables------------------------------- 2155 !scalars 2156 integer :: idat,ix,iy,iz,padat 2157 real(dp) :: fim,fre 2158 2159 ! ************************************************************************* 2160 2161 if (cplex==1) then 2162 ! 2163 if (ndat==1) then 2164 !$OMP PARALLEL DO 2165 do iz=1,nz 2166 do iy=1,ny 2167 do ix=1,nx 2168 ur(1,ix,iy,iz) = vloc(ix,iy,iz) * ur(1,ix,iy,iz) 2169 ur(2,ix,iy,iz) = vloc(ix,iy,iz) * ur(2,ix,iy,iz) 2170 end do 2171 end do 2172 end do 2173 ! 2174 else 2175 ! 2176 !$OMP PARALLEL DO PRIVATE(padat) 2177 do idat=1,ndat 2178 padat = ldz*(idat-1) 2179 do iz=1,nz 2180 do iy=1,ny 2181 do ix=1,nx 2182 ur(1,ix,iy,iz+padat) = vloc(ix,iy,iz) * ur(1,ix,iy,iz+padat) 2183 ur(2,ix,iy,iz+padat) = vloc(ix,iy,iz) * ur(2,ix,iy,iz+padat) 2184 end do 2185 end do 2186 end do 2187 end do 2188 ! 2189 end if 2190 ! 2191 else if (cplex==2)then 2192 ! 2193 if (ndat==1) then 2194 !$OMP PARALLEL DO PRIVATE(fre,fim) 2195 do iz=1,nz 2196 do iy=1,ny 2197 do ix=1,nx 2198 fre = ur(1,ix,iy,iz) 2199 fim = ur(2,ix,iy,iz) 2200 ur(1,ix,iy,iz) = vloc(2*ix-1,iy,iz)*fre - vloc(2*ix,iy,iz)*fim 2201 ur(2,ix,iy,iz) = vloc(2*ix-1,iy,iz)*fim + vloc(2*ix,iy,iz)*fre 2202 end do 2203 end do 2204 end do 2205 else 2206 !$OMP PARALLEL DO PRIVATE(padat,fre,fim) 2207 do idat=1,ndat 2208 padat = ldz*(idat-1) 2209 do iz=1,nz 2210 do iy=1,ny 2211 do ix=1,nx 2212 fre = ur(1,ix,iy,iz+padat) 2213 fim = ur(2,ix,iy,iz+padat) 2214 ur(1,ix,iy,iz+padat) = vloc(2*ix-1,iy,iz)*fre - vloc(2*ix,iy,iz)*fim 2215 ur(2,ix,iy,iz+padat) = vloc(2*ix-1,iy,iz)*fim + vloc(2*ix,iy,iz)*fre 2216 end do 2217 end do 2218 end do 2219 end do 2220 end if 2221 ! 2222 else 2223 ur = huge(one) 2224 !ABI_BUG("Wrong cplex") 2225 end if 2226 2227 end subroutine cg_vlocpsi
m_cgtools/cg_zaxpby [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_zaxpby
FUNCTION
Scales two vectors, adds them to one another and stores result in the vector. y := a*x + b*y
INPUTS
n = the number of elements in vectors x and y. a = Specifies the scalar a. x = Array. b = Specifies the scalar b. y = Array
OUTPUT
y Contains the updated vector y.
SOURCE
718 subroutine cg_zaxpby(n, a, x, b, y) 719 720 !Arguments ------------------------------------ 721 !scalars 722 integer,intent(in) :: n 723 real(dp),intent(in) :: a(2),b(2) 724 !arrays 725 real(dp),intent(in) :: x(2*n) 726 real(dp),intent(inout) :: y(2*n) 727 728 ! ************************************************************************* 729 730 #ifdef HAVE_LINALG_AXPBY 731 call zaxpby(n, a, x, 1, b, y, 1) 732 #else 733 call zscal(n, b, y, 1) 734 call zaxpy(n, a, x, 1, y,1) 735 #endif 736 737 end subroutine cg_zaxpby
m_cgtools/cg_zaxpy [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_zaxpy
FUNCTION
Computes y = alpha*x + y
INPUTS
n = Specifies the number of elements in vectors x and y. alpha = Specifies the scalar alpha. x = Array
SIDE EFFECTS
y = Array. In output, y contains the updated vector.
SOURCE
675 subroutine cg_zaxpy(n, alpha, x, y) 676 677 !Arguments ------------------------------------ 678 !scalars 679 integer,intent(in) :: n 680 real(dp),intent(in) :: alpha(2) 681 !arrays 682 real(dp),intent(in) :: x(2*n) 683 real(dp),intent(inout) :: y(2*n) 684 685 ! ************************************************************************* 686 687 if (alpha(2) == zero) then 688 call daxpy(2*n, alpha(1), x, 1, y, 1) 689 else 690 call zaxpy(n, alpha, x, 1, y, 1) 691 end if 692 693 end subroutine cg_zaxpy
m_cgtools/cg_zaxpy_many_areal [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_zaxpy_many_areal
FUNCTION
Computes y = alpha*x + y
INPUTS
n = Specifies the number of elements in vectors x and y. ndat alpha(ndat) = Specifies the scalar alpha. x = Array
SIDE EFFECTS
y = Array. In output, y contains the updated vector.
SOURCE
5795 subroutine cg_zaxpy_many_areal(npwsp, ndat, alphas, x, y) 5796 5797 !Arguments ------------------------------------ 5798 !scalars 5799 integer,intent(in) :: npwsp, ndat 5800 real(dp),intent(in) :: alphas(ndat) 5801 !arrays 5802 real(dp),intent(in) :: x(2*npwsp, ndat) 5803 real(dp),intent(inout) :: y(2*npwsp, ndat) 5804 5805 !Local variables------------------------------- 5806 integer :: idat 5807 ! ************************************************************************* 5808 5809 !$OMP PARALLEL DO IF (ndat > 1) 5810 do idat=1,ndat 5811 call daxpy(2*npwsp, alphas(idat), x(1,idat), 1, y(1,idat), 1) 5812 end do 5813 5814 end subroutine cg_zaxpy_many_areal
m_cgtools/cg_zcopy [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_zcopy
FUNCTION
Perform y = x, where x and y are vectors.
INPUTS
n = Specifies the number of elements in vectors x and y. x = Input Array
OUTPUT
y = In output, y contains a copy of the values of x.
SOURCE
418 subroutine cg_zcopy(n, x, y) 419 420 !Arguments ------------------------------------ 421 !scalars 422 integer,intent(in) :: n 423 !arrays 424 real(dp),intent(in) :: x(2*n) 425 real(dp),intent(out) :: y(2*n) 426 427 ! ************************************************************************* 428 429 call zcopy(n, x, 1, y, 1) 430 431 end subroutine cg_zcopy
m_cgtools/cg_zdotc [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_zdotc
FUNCTION
Perform a vector-vector operation defined as res = \Sigma (conjg(x)*y) where x and y are n-element vectors.
INPUTS
n = Specifies the number of elements in vector x and y x,y = Input arrays.
OUTPUT
res(2)=Real and Imaginary part of the scalar product.
SOURCE
524 function cg_zdotc(n, x, y) result(res) 525 526 !Arguments ------------------------------------ 527 !scalars 528 integer,intent(in) :: n 529 !arrays 530 real(dp),intent(in) :: x(2,n) 531 real(dp),intent(in) :: y(2,n) 532 real(dp) :: res(2) 533 534 !Local variables------------------------------- 535 #ifdef HAVE_LINALG_ZDOTC_BUG 536 integer :: ii 537 #else 538 complex(dpc) :: cres 539 complex(dpc),external :: zdotc 540 #endif 541 542 ! ************************************************************************* 543 544 #ifdef HAVE_LINALG_ZDOTC_BUG 545 ! Workaround for veclib on MacOSx 546 res = zero 547 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:res) 548 do ii=1,n 549 res(1) = res(1) + x(1,ii)*y(1,ii) + x(2,ii)*y(2,ii) 550 res(2) = res(2) + x(1,ii)*y(2,ii) - x(2,ii)*y(1,ii) 551 end do 552 553 #else 554 cres = zdotc(n, x, 1, y, 1) 555 res(1) = REAL(cres) 556 res(2) = AIMAG(cres) 557 #endif 558 559 end function cg_zdotc
m_cgtools/cg_zdotg_zip [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_zdotg_zip
FUNCTION
Compute <cg1|cg2> for ndat states
INPUTS
OUTPUT
SOURCE
5700 subroutine cg_zdotg_zip(istwf_k, npwsp, ndat, option, cg1, cg2, dots, me_g0, comm) 5701 5702 integer,intent(in) :: istwf_k, npwsp, ndat, option, me_g0, comm 5703 real(dp),intent(in) :: cg1(2*npwsp,ndat), cg2(2*npwsp,ndat) 5704 real(dp),intent(out) :: dots(2,ndat) 5705 5706 !Local variables------------------------------- 5707 integer :: idat, ierr 5708 real(dp) :: dotr, doti, re_dots(ndat) 5709 ! ************************************************************************* 5710 5711 !$OMP PARALLEL DO IF (ndat > 1) PRIVATE(dotr, doti) 5712 do idat=1,ndat 5713 call dotprod_g(dotr, doti, istwf_k, npwsp, option, cg1(:,idat), cg2(:,idat), me_g0, xmpi_comm_self) 5714 if (istwf_k == 2) then 5715 re_dots(idat) = dotr 5716 else 5717 dots(:, idat) = [dotr, doti] 5718 end if 5719 end do 5720 5721 if (xmpi_comm_size(comm) > 1) then 5722 if (istwf_k == 2) then 5723 call xmpi_sum(re_dots, comm, ierr) 5724 else 5725 call xmpi_sum(dots, comm, ierr) 5726 end if 5727 end if 5728 5729 if (istwf_k == 2) then 5730 do idat=1,ndat 5731 dots(1,idat) = re_dots(idat) 5732 dots(2,idat) = zero 5733 end do 5734 end if 5735 5736 end subroutine cg_zdotg_zip
m_cgtools/cg_zdotu [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_zdotu
FUNCTION
Perform a vector-vector operation defined as res = \Sigma (x*y) where x and y are n-element vectors. Note that x is unconjugated.
INPUTS
n = Specifies the number of elements in vector x and y x,y = Input arrays.
OUTPUT
res(2)=Real and Imaginary part of the scalar product.
SOURCE
619 function cg_zdotu(n, x, y) result(res) 620 621 !Arguments ------------------------------------ 622 !scalars 623 integer,intent(in) :: n 624 !arrays 625 real(dp),intent(in) :: x(2,n) 626 real(dp),intent(in) :: y(2,n) 627 real(dp) :: res(2) 628 629 !Local variables------------------------------- 630 #ifdef HAVE_LINALG_ZDOTU_BUG 631 integer :: ii 632 #else 633 complex(dpc) :: cres 634 complex(dpc),external :: zdotu 635 #endif 636 637 ! ************************************************************************* 638 639 #ifdef HAVE_LINALG_ZDOTU_BUG 640 ! Workaround for veclib on MacOSx 641 res = zero 642 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:res) 643 do ii=1,n 644 res(1) = res(1) + x(1,ii)*y(1,ii) - x(2,ii)*y(2,ii) 645 res(2) = res(2) + x(1,ii)*y(2,ii) + x(2,ii)*y(1,ii) 646 end do 647 #else 648 cres = zdotu(n, x, 1, y, 1) 649 res(1) = REAL(cres) 650 res(2) = AIMAG(cres) 651 #endif 652 653 end function cg_zdotu
m_cgtools/cg_zgemm [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_zgemm
FUNCTION
The cg_zgemm routines perform a matrix-matrix operation with general matrices. The operation is defined as C := alpha*op(A)*op(B) + beta*C, where: op(x) is one of op(x) = x, or op(x) = x', or op(x) = conjg(x'), alpha and beta are scalars, A, B and C are matrices: op(A) is an m-by-k matrix, op(B) is a k-by-n matrix, C is an m-by-n matrix.
INPUTS
OUTPUT
SOURCE
823 subroutine cg_zgemm(transa, transb, npwsp, ncola, ncolb, cg_a, cg_b, cg_c, alpha, beta) 824 825 !Arguments ------------------------------------ 826 !scalars 827 integer,intent(in) :: npwsp,ncola,ncolb 828 real(dp),optional,intent(in) :: alpha(2), beta(2) 829 character(len=1),intent(in) :: transa, transb 830 !arrays 831 real(dp),intent(in) :: cg_a(2,npwsp*ncola), cg_b(2,npwsp*ncolb) 832 real(dp),intent(inout) :: cg_c(2,*) 833 834 !Local variables------------------------------- 835 !scalars 836 integer :: mm,nn,kk,lda,ldb,ldc 837 !real(dp) :: my_alpha(2),my_beta(2) 838 complex(dpc) :: my_calpha, my_cbeta 839 840 ! ************************************************************************* 841 842 lda = npwsp 843 ldb = npwsp 844 845 mm = npwsp 846 nn = ncolb 847 kk = ncola 848 849 if (toupper(transa) /= 'N') then 850 mm = ncola 851 kk = npwsp 852 end if 853 if (toupper(transb) /= 'N') nn = npwsp 854 855 ldc = mm 856 857 !my_alpha = cg_cone; if (PRESENT(alpha)) my_alpha = alpha 858 !my_beta = cg_czero; if (PRESENT(beta)) my_beta = beta 859 !call ZGEMM(transa, transb, mm, nn, kk, my_alpha, cg_a, lda, cg_b, ldb, my_beta, cg_c, ldc) 860 !call ZGEMM3M(transa, transb, mm, nn, kk, my_alpha, cg_a, lda, cg_b, ldb, my_beta, cg_c, ldc) 861 862 my_calpha = cone; if (PRESENT(alpha)) my_calpha = DCMPLX(alpha(1), alpha(2)) 863 my_cbeta = czero; if (PRESENT(beta)) my_cbeta = DCMPLX(beta(1), beta(2)) 864 865 call abi_zgemm_2r(transa, transb, mm, nn, kk, my_calpha, cg_a, lda, cg_b, ldb, my_cbeta, cg_c, ldc) 866 867 end subroutine cg_zgemm
m_cgtools/cg_zgemv [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_zgemv
FUNCTION
The cg_zgemv routines perform a **complex** matrix-vector operation defined as: y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or y := alpha*conjg(A')*x + beta*y, where: alpha and beta are COMPLEX scalars, x and y are COMPLEX vectors, A is a m-by-n COMPLEX matrix. Default is: alpha = 1 and beta = 0.
INPUTS
OUTPUT
SOURCE
764 subroutine cg_zgemv(trans, nrows, ncols, cgmat, vec, matvec, alpha, beta) 765 766 !Arguments ------------------------------------ 767 !scalars 768 integer,intent(in) :: nrows, ncols 769 real(dp),optional,intent(in) :: alpha(2), beta(2) 770 character(len=1),intent(in) :: trans 771 !arrays 772 real(dp),intent(in) :: cgmat(2,nrows*ncols), vec(2,*) 773 real(dp),intent(inout) :: matvec(2,*) 774 775 !Local variables------------------------------- 776 !scalars 777 integer :: mm, nn, kk, lda, ldb, ldc 778 real(dp) :: my_alpha(2), my_beta(2) 779 780 ! ************************************************************************* 781 782 my_alpha = cg_cone; if (present(alpha)) my_alpha = alpha 783 my_beta = cg_czero; if (present(beta)) my_beta = beta 784 785 lda = nrows; mm = nrows; nn = 1; kk = ncols 786 if (toupper(trans) /= 'N') then 787 mm = ncols; kk = nrows 788 end if 789 ldb = kk; ldc = mm 790 791 call ZGEMM(trans, "N", mm, nn, kk, my_alpha, cgmat, lda, vec, ldb, my_beta, matvec, ldc) 792 ! ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) 793 794 !call ZGEMV(trans,mm,nn,my_alpha,cgmat,lda,vec,1,my_beta,matvec,1) 795 796 end subroutine cg_zgemv
m_cgtools/cg_zprecon_block [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_zprecon_block
FUNCTION
precondition $<G|(H-e_{n,k})|C_{n,k}>$ for a block of band (band-FFT parallelisation)
INPUTS
blocksize= size of blocks of bands $cg(vectsize,blocksize)=<G|C_{n,k}> for a block of bands$. $eval(blocksize,blocksize)=current block of bands eigenvalues=<C_{n,k}|H|C_{n,k}>$. $ghc(vectsize,blocksize)=<G|H|C_{n,k}> for a block of bands$. iterationnumber=number of iterative minimizations in LOBPCG kinpw(npw)=(modified) kinetic energy for each plane wave (Hartree) nspinor=number of spinorial components of the wavefunctions (on current proc) $vect(vectsize,blocksize)=<G|H|C_{n,k}> for a block of bands$. npw=number of planewaves at this k point. optekin= 1 if the kinetic energy used in preconditionning is modified according to Kresse, Furthmuller, PRB 54, 11169 (1996) [[cite:Kresse1996]] 0 otherwise optpcon= 0 the TPA preconditionning matrix does not depend on band 1 the TPA preconditionning matrix (not modified) 2 the TPA preconditionning matrix is independant of iteration number vectsize= size of vectors comm=MPI communicator.
OUTPUT
vect(2,npw)=<g|(h-eval)|c_{n,k}>*(polynomial ratio)
SIDE EFFECTS
pcon(npw,blocksize)=preconditionning matrix input if optpcon=0,2 and iterationnumber/=1 output if optpcon=0,2 and iterationnumber==1
SOURCE
3748 subroutine cg_zprecon_block(cg,eval,blocksize,iterationnumber,kinpw,& 3749 & npw,nspinor,optekin,optpcon,pcon,ghc,vect,vectsize,comm) 3750 3751 !Arguments ------------------------------------ 3752 !scalars 3753 integer,intent(in) :: blocksize,iterationnumber,npw,nspinor,optekin 3754 integer,intent(in) :: optpcon,vectsize,comm 3755 !arrays 3756 real(dp),intent(in) :: kinpw(npw) 3757 real(dp),intent(inout) :: pcon(npw,blocksize) 3758 complex(dpc),intent(in) :: cg(vectsize,blocksize),eval(blocksize,blocksize) 3759 complex(dpc),intent(in) :: ghc(vectsize,blocksize) 3760 complex(dpc),intent(inout) :: vect(vectsize,blocksize) 3761 3762 !Local variables------------------------------- 3763 !scalars 3764 integer :: iblocksize,ierr,ig,igs,ispinor 3765 real(dp) :: fac,poly,xx 3766 !character(len=500) :: msg 3767 !arrays 3768 real(dp) :: tsec(2) 3769 real(dp),allocatable :: ek0(:),ek0_inv(:) 3770 3771 ! ************************************************************************* 3772 3773 call timab(536,1,tsec) 3774 3775 !In this case, the Teter, Allan and Payne preconditioner is approximated: 3776 !the factor xx=Ekin(G) and no more Ekin(G)/Ekin(iband) 3777 if (optpcon==0) then 3778 do ispinor=1,nspinor 3779 igs=(ispinor-1)*npw 3780 do ig=1+igs,npw+igs 3781 if (iterationnumber==1) then 3782 if(kinpw(ig-igs)<huge(zero)*1.d-11)then 3783 xx=kinpw(ig-igs) 3784 ! teter polynomial ratio 3785 poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp)) 3786 fac=poly/(poly+16._dp*xx**4) 3787 if (optekin==1) fac=two*fac 3788 pcon(ig-igs,1)=fac 3789 do iblocksize=1,blocksize 3790 vect(ig,iblocksize)=(ghc(ig,iblocksize)-eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,1) 3791 end do 3792 else 3793 pcon(ig-igs,1)=zero 3794 vect(ig,:)=dcmplx(0.0_dp,0.0_dp) 3795 end if 3796 else 3797 do iblocksize=1,blocksize 3798 vect(ig,iblocksize)=(ghc(ig,iblocksize)-eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,1) 3799 end do 3800 end if 3801 end do 3802 end do 3803 3804 else if (optpcon>0) then 3805 ! Compute mean kinetic energy of all bands 3806 ABI_MALLOC(ek0,(blocksize)) 3807 ABI_MALLOC(ek0_inv,(blocksize)) 3808 if (iterationnumber==1.or.optpcon==1) then 3809 do iblocksize=1,blocksize 3810 ek0(iblocksize)=0.0_dp 3811 do ispinor=1,nspinor 3812 igs=(ispinor-1)*npw 3813 do ig=1+igs,npw+igs 3814 if(kinpw(ig-igs)<huge(zero)*1.d-11)then 3815 ek0(iblocksize)=ek0(iblocksize)+kinpw(ig-igs)*& 3816 & (real(cg(ig,iblocksize))**2+aimag(cg(ig,iblocksize))**2) 3817 end if 3818 end do 3819 end do 3820 end do 3821 3822 call xmpi_sum(ek0,comm,ierr) 3823 3824 do iblocksize=1,blocksize 3825 if(ek0(iblocksize)<1.0d-10)then 3826 ABI_WARNING('the mean kinetic energy of a wavefunction vanishes. it is reset to 0.1ha.') 3827 ek0(iblocksize)=0.1_dp 3828 end if 3829 end do 3830 if (optekin==1) then 3831 ek0_inv(:)=2.0_dp/(3._dp*ek0(:)) 3832 else 3833 ek0_inv(:)=1.0_dp/ek0(:) 3834 end if 3835 end if !iterationnumber==1.or.optpcon==1 3836 3837 ! Carry out preconditioning 3838 do iblocksize=1,blocksize 3839 do ispinor=1,nspinor 3840 igs=(ispinor-1)*npw 3841 do ig=1+igs,npw+igs 3842 if (iterationnumber==1.or.optpcon==1) then 3843 if(kinpw(ig-igs)<huge(zero)*1.d-11)then 3844 xx=kinpw(ig-igs)*ek0_inv(iblocksize) 3845 ! teter polynomial ratio 3846 poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp)) 3847 fac=poly/(poly+16._dp*xx**4) 3848 if (optekin==1) fac=two*fac 3849 pcon(ig-igs,iblocksize)=fac 3850 vect(ig,iblocksize)=(ghc(ig,iblocksize)-& 3851 & eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,iblocksize) 3852 else 3853 pcon(ig-igs,iblocksize)=zero 3854 vect(ig,iblocksize)=dcmplx(0.0_dp,0.0_dp) 3855 end if 3856 else 3857 vect(ig,iblocksize)=(ghc(ig,iblocksize)-& 3858 & eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,iblocksize) 3859 end if 3860 end do 3861 end do 3862 end do 3863 ABI_FREE(ek0) 3864 ABI_FREE(ek0_inv) 3865 end if !optpcon 3866 3867 call timab(536,2,tsec) 3868 3869 end subroutine cg_zprecon_block
m_cgtools/cg_zscal [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cg_zscal
FUNCTION
Perform x = a*x
INPUTS
n = Specifies the number of elements in vector x. a(2)= The scalar a. If a(2) is zero, x = a*x is computed via zdscal
OUTPUT
x = Updated vector.
SOURCE
452 subroutine cg_zscal(n, a, x) 453 454 !Arguments ------------------------------------ 455 !scalars 456 integer,intent(in) :: n 457 real(dp),intent(in) :: a(2) 458 !arrays 459 real(dp),intent(inout) :: x(2*n) 460 461 ! ************************************************************************* 462 463 if (a(2) == zero) then 464 call dscal(2*n, a(1), x, 1) 465 else 466 call zscal(n, a, x, 1) 467 end if 468 469 end subroutine cg_zscal
m_cgtools/cgnc_cholesky [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cgnc_cholesky
FUNCTION
Cholesky orthonormalization of the vectors stored in cg (version optimized for NC wavefunctions).
INPUTS
npwsp=Size of each vector (usually npw*nspinor) nband=Number of band in cg istwfk=Storage mode for the wavefunctions. 1 for standard full mode me_g0=1 if this node has G=0. comm_pw=MPI communicator for the planewave group. Set to xmpi_comm_self for sequential mode.
SIDE EFFECTS
cg(2*npwsp*nband) input: Input set of vectors. output: Orthonormalized set.
OUTPUT
[umat]=Cholesky upper triangle matrix.
SOURCE
2256 subroutine cgnc_cholesky(npwsp, nband, cg, istwfk, me_g0, comm_pw, use_gemm, umat) 2257 2258 !Arguments ------------------------------------ 2259 !scalars 2260 integer,intent(in) :: npwsp, nband, istwfk, comm_pw, me_g0 2261 logical,optional,intent(in) :: use_gemm 2262 !arrays 2263 real(dp),intent(inout) :: cg(2*npwsp*nband) 2264 real(dp),optional,allocatable,intent(out) :: umat(:,:,:) 2265 2266 !Local variables ------------------------------ 2267 !scalars 2268 integer :: ierr,b1,b2 2269 #ifdef DEBUG_MODE 2270 integer :: ptr 2271 character(len=500) :: msg 2272 #endif 2273 !real(dp) :: max_absimag 2274 logical :: my_usegemm 2275 !arrays 2276 real(dp) :: rcg0(nband) 2277 real(dp),allocatable :: r_ovlp(:,:), c_ovlp(:,:,:) 2278 2279 ! ************************************************************************* 2280 2281 #ifdef DEBUG_MODE 2282 if (istwfk == 2 .and. me_g0 == 1) then 2283 ierr = 0 2284 do b1=1,nband 2285 ptr = 2 + 2*(b1-1)*npwsp 2286 if (abs(cg(ptr)) > zero) then 2287 ierr = ierr + 1 2288 write(msg,'(a,i0,es13.6)')" Input b1, Im u(g=0) should be zero ",b1,cg(ptr) 2289 call wrtout(std_out, msg) 2290 !cg(ptr) = zero 2291 end if 2292 end do 2293 ABI_CHECK(ierr == 0, "Non zero imag part") 2294 end if 2295 #endif 2296 2297 ! In matrix notation O = PSI^H PSI = U^H U where PSI is a (ng,nb) matrix with the input wavefunctions 2298 ! The new orthogonalized states PHI is given by: PHI = PSI U^{-1} 2299 2300 my_usegemm = .FALSE.; if (PRESENT(use_gemm)) my_usegemm = use_gemm 2301 2302 if (istwfk /= 1) then 2303 ! Version optimized for real wavefunctions. 2304 ABI_MALLOC(r_ovlp, (nband, nband)) 2305 2306 !call cg_set_imag0_to_zero(istwfk, me_g0, npwsp, nband, cg, max_absimag) 2307 2308 ! 1) Calculate O_ij = <phi_i|phi_j> (real symmetric matrix) 2309 if (my_usegemm) then 2310 call DGEMM("T", "N", nband, nband, 2*npwsp, one, cg, 2*npwsp, cg, 2*npwsp, zero, r_ovlp, nband) 2311 else 2312 call DSYRK("U", "T", nband, 2*npwsp, one, cg, 2*npwsp, zero, r_ovlp, nband) 2313 end if 2314 2315 r_ovlp = two * r_ovlp 2316 if (istwfk == 2 .and. me_g0 == 1) then 2317 ! Extract the real part at G=0 and subtract its contribution to the overlap. 2318 call dcopy(nband, cg, 2*npwsp, rcg0, 1) 2319 do b2=1,nband 2320 do b1=1,b2 2321 r_ovlp(b1, b2) = r_ovlp(b1, b2) - rcg0(b1) * rcg0(b2) 2322 end do 2323 end do 2324 end if 2325 2326 ! Sum the overlap if PW are distributed. 2327 if (comm_pw /= xmpi_comm_self) call xmpi_sum(r_ovlp, comm_pw, ierr) 2328 2329 ! 2) Cholesky factorization: O = U^H U with U upper triangle matrix. 2330 call DPOTRF('U', nband, r_ovlp, nband, ierr) 2331 ABI_CHECK(ierr == 0, sjoin('DPOTRF returned info:', itoa(ierr))) 2332 2333 ! 3) Solve X U = cg. On exit cg is orthonormalized. 2334 call DTRSM('R', 'U', 'N', 'N', 2*npwsp, nband, one, r_ovlp, nband, cg, 2*npwsp) 2335 2336 if (present(umat)) then 2337 ABI_REMALLOC(umat, (1, nband, nband)) 2338 umat(1,:,:) = r_ovlp 2339 end if 2340 2341 ABI_FREE(r_ovlp) 2342 2343 else 2344 ! Version for complex wavefunctions. 2345 ABI_MALLOC(c_ovlp, (2, nband, nband)) 2346 2347 ! 1) Calculate O_ij = <phi_i|phi_j> (complex Hermitean) 2348 if (my_usegemm) then 2349 call abi_zgemm_2r("C", "N", nband, nband, npwsp, cone, cg, npwsp, cg, npwsp, czero, c_ovlp, nband) 2350 else 2351 call ZHERK("U", "C", nband, npwsp, cone, cg, npwsp, czero, c_ovlp, nband) 2352 end if 2353 2354 ! Sum the overlap if PW are distributed. 2355 if (comm_pw /= xmpi_comm_self) call xmpi_sum(c_ovlp, comm_pw, ierr) 2356 2357 ! 2) Cholesky factorization: O = U^H U with U upper triangle matrix. 2358 call ZPOTRF('U', nband, c_ovlp, nband, ierr) 2359 ABI_CHECK(ierr == 0, sjoin('ZPOTRF returned info:', itoa(ierr))) 2360 2361 ! 3) Solve X U = cg. On exit cg is orthonormalized. 2362 call ZTRSM('R', 'U', 'N', 'N', npwsp, nband, cone, c_ovlp, nband, cg, npwsp) 2363 2364 if (present(umat)) then 2365 ABI_REMALLOC(umat, (2, nband, nband)) 2366 umat = c_ovlp 2367 end if 2368 2369 ABI_FREE(c_ovlp) 2370 end if 2371 2372 #ifdef DEBUG_MODE 2373 if (istwfk == 2) then 2374 ierr = 0 2375 do b1=1,nband 2376 ptr = 2 + 2*(b1-1)*npwsp 2377 if (ABS(cg(ptr)) > zero) then 2378 ierr = ierr + 1 2379 write(msg,'(a,i0,es13.6)')" Output b1, Im u(g=0) should be zero ",b1,cg(ptr) 2380 end if 2381 end do 2382 ABI_CHECK(ierr == 0, "Non zero imag part") 2383 end if 2384 #endif 2385 2386 end subroutine cgnc_cholesky
m_cgtools/cgnc_gramschmidt [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cgnc_grortho
FUNCTION
Gram-Schmidt orthonormalization of the vectors stored in cg
INPUTS
npwsp=Size of each vector (usually npw*nspinor) nband=Number of band in cg istwfk=Storage mode for the wavefunctions. 1 for standard full mode me_g0=1 if this node has G=0. comm_pw=MPI communicator for the planewave group. Set to xmpi_comm_self for sequential mode.
SIDE EFFECTS
cg(2*npwsp*nband) input: Input set of vectors. output: Orthonormalized set.
SOURCE
2704 subroutine cgnc_gramschmidt(npwsp, nband, cg, istwfk, me_g0, comm_pw) 2705 2706 !Arguments ------------------------------------ 2707 !scalars 2708 integer,intent(in) :: npwsp, nband, istwfk, comm_pw, me_g0 2709 !arrays 2710 real(dp),intent(inout) :: cg(2*npwsp*nband) 2711 2712 !Local variables ------------------------------ 2713 !scalars 2714 integer :: b1,nb2,opt 2715 logical :: normalize 2716 2717 ! ************************************************************************* 2718 2719 ! Normalize the first vector. 2720 call cgnc_normalize(npwsp,1,cg(1),istwfk,me_g0,comm_pw) 2721 if (nband == 1) RETURN 2722 2723 ! Orthogonaluze b1 wrt to the bands in [1,b1-1]. 2724 normalize = .TRUE. 2725 do b1=2,nband 2726 opt = 1 + 2*npwsp*(b1-1) 2727 nb2=b1-1 2728 call cgnc_gsortho(npwsp,nb2,cg(1),1,cg(opt),istwfk,normalize,me_g0,comm_pw) 2729 end do 2730 2731 end subroutine cgnc_gramschmidt
m_cgtools/cgnc_gsortho [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cgnc_gsortho
FUNCTION
INPUTS
npwsp=Size of each vector (usually npw*nspinor) nband1=Number of vectors in icg1 nband1=Number of vectors in cg2 comm_pw=MPI communicator.
SIDE EFFECTS
cg2(2*npwsp*nband2) icg1(2*npwsp*nband1) input: Input set of vectors. output: Orthonormalized set.
SOURCE
2622 subroutine cgnc_gsortho(npwsp, nband1, icg1, nband2, iocg2, istwfk, normalize, me_g0, comm_pw) 2623 2624 !Arguments ------------------------------------ 2625 !scalars 2626 integer,intent(in) :: npwsp,nband1,nband2,istwfk,me_g0 2627 integer,optional,intent(in) :: comm_pw 2628 logical,intent(in) :: normalize 2629 !arrays 2630 real(dp),intent(in) :: icg1(2*npwsp*nband1) 2631 real(dp),intent(inout) :: iocg2(2*npwsp*nband2) 2632 2633 !Local variables ------------------------------ 2634 !scalars 2635 integer :: ierr,b1,b2 2636 !arrays 2637 real(dp) :: r_icg1(nband1),r_iocg2(nband2) 2638 real(dp),allocatable :: proj(:,:,:) 2639 2640 ! ************************************************************************* 2641 2642 ABI_MALLOC(proj, (2, nband1, nband2)) 2643 !proj = zero 2644 2645 ! 1) Calculate <cg1|cg2> 2646 call cg_zgemm("C", "N", npwsp, nband1, nband2, icg1, iocg2, proj) 2647 2648 if (istwfk>1) then 2649 ! nspinor is always 1 in this case. 2650 ! Account for the missing G and set the imaginary part to zero since wavefunctions are real. 2651 proj(1,:,:) = two * proj(1,:,:) 2652 proj(2,:,:) = zero 2653 ! 2654 if (istwfk==2 .and. me_g0==1) then 2655 ! Extract the real part at G=0 and subtract its contribution. 2656 call dcopy(nband1,icg1, 2*npwsp,r_icg1, 1) 2657 call dcopy(nband2,iocg2,2*npwsp,r_iocg2,1) 2658 do b2=1,nband2 2659 do b1=1,nband1 2660 proj(1,b1,b2) = proj(1,b1,b2) - r_icg1(b1) * r_iocg2(b2) 2661 end do 2662 end do 2663 end if 2664 ! 2665 end if 2666 ! 2667 ! This is for the MPI version 2668 if (comm_pw /= xmpi_comm_self) call xmpi_sum(proj,comm_pw,ierr) 2669 2670 ! 2) cg2 = cg2 - <cg1|cg2> cg1 2671 call cg_zgemm("N","N",npwsp,nband1,nband2,icg1,proj,iocg2,alpha=-cg_cone,beta=cg_cone) 2672 2673 ABI_FREE(proj) 2674 2675 ! 3) Normalize iocg2 if required. 2676 if (normalize) call cgnc_normalize(npwsp,nband2,iocg2,istwfk,me_g0,comm_pw) 2677 2678 end subroutine cgnc_gsortho
m_cgtools/cgnc_normalize [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cgnc_normalize
FUNCTION
INPUTS
npwsp=Size of each vector (usually npw*nspinor) nband=Number of vectors in icg1
SIDE EFFECTS
SOURCE
2539 subroutine cgnc_normalize(npwsp, nband, cg, istwfk, me_g0, comm_pw) 2540 2541 !Arguments ------------------------------------ 2542 !scalars 2543 integer,intent(in) :: npwsp,nband,istwfk,me_g0,comm_pw 2544 !arrays 2545 real(dp),intent(inout) :: cg(2*npwsp*nband) 2546 2547 !Local variables ------------------------------ 2548 !scalars 2549 integer :: ptr,ierr,band 2550 !character(len=500) :: msg 2551 !arrays 2552 real(dp) :: norm(nband),alpha(2) 2553 2554 ! ************************************************************************* 2555 2556 !$OMP PARALLEL DO PRIVATE(ptr) IF (nband > 1) 2557 do band=1,nband 2558 ptr = 1 + 2*npwsp*(band-1) 2559 norm(band) = cg_dznrm2(npwsp, cg(ptr)) 2560 norm(band) = norm(band) ** 2 2561 !norm(band) = cg_real_zdotc(npwsp, cg(ptr), cg(ptr)) 2562 end do 2563 2564 if (istwfk > 1) then 2565 norm = two * norm 2566 if (istwfk == 2 .and. me_g0 == 1) then 2567 !$OMP PARALLEL DO PRIVATE(ptr) IF (nband >1) 2568 do band=1,nband 2569 ptr = 1 + 2*npwsp*(band-1) 2570 norm(band) = norm(band) - cg(ptr)**2 2571 end do 2572 end if 2573 end if 2574 2575 if (comm_pw /= xmpi_comm_self) call xmpi_sum(norm, comm_pw, ierr) 2576 2577 ierr = 0 2578 do band=1,nband 2579 if (norm(band) > zero) then 2580 norm(band) = SQRT(norm(band)) 2581 else 2582 ierr = ierr + 1 2583 end if 2584 end do 2585 2586 if (ierr /= 0) then 2587 ABI_ERROR(sjoin("Found ", itoa(ierr)," vectors with norm <= zero!")) 2588 end if 2589 2590 !$OMP PARALLEL DO PRIVATE(ptr,alpha) IF (nband > 1) 2591 do band=1,nband 2592 ptr = 1 + 2*npwsp*(band-1) 2593 alpha = [one / norm(band), zero] 2594 call cg_zscal(npwsp, alpha, cg(ptr)) 2595 end do 2596 2597 end subroutine cgnc_normalize
m_cgtools/cgpaw_cholesky [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cgpaw_cholesky
FUNCTION
Cholesky orthonormalization of the vectors stored in cg. (version for PAW wavefunctions).
INPUTS
npwsp=Size of each vector (usually npw*nspinor) nband=Number of band in cg and gsc istwfk=Storage mode for the wavefunctions. 1 for standard full mode me_g0=1 if this node has G=0. comm_pw=MPI communicator for the planewave group. Set to xmpi_comm_self for sequential mode.
SIDE EFFECTS
cg(2*npwsp*nband) input: Input set of vectors |C>, S|C> output: Orthonormalized set such as <C|S|C> = 1 gsc(2*npwsp*nband): destroyed in output.
OUTPUT
[umat]=Cholesky upper triangle matrix.
SOURCE
2416 subroutine cgpaw_cholesky(npwsp, nband, cg, gsc, istwfk, me_g0, comm_pw, umat) 2417 2418 !Arguments ------------------------------------ 2419 !scalars 2420 integer,intent(in) :: npwsp, nband, istwfk, me_g0, comm_pw 2421 !arrays 2422 real(dp),intent(inout) :: cg(2*npwsp*nband), gsc(2*npwsp*nband) 2423 real(dp),optional,allocatable,intent(out) :: umat(:,:,:) 2424 2425 !Local variables ------------------------------ 2426 !scalars 2427 integer :: ierr, b1, b2 2428 !real(dp) :: max_absimag 2429 !character(len=500) :: msg 2430 !arrays 2431 real(dp) :: rcg0(nband), rg0sc(nband) 2432 real(dp),allocatable :: r_ovlp(:,:), c_ovlp(:,:,:) 2433 2434 ! ************************************************************************* 2435 2436 if (istwfk /= 1) then 2437 ! Version optimized for real wavefunctions. 2438 ABI_MALLOC(r_ovlp, (nband, nband)) 2439 2440 !call cg_set_imag0_to_zero(istwfk, me_g0, npwsp, nband, cg, max_absimag) 2441 !call cg_set_imag0_to_zero(istwfk, me_g0, npwsp, nband, gsc, max_absimag) 2442 2443 #ifdef HAVE_LINALG_GEMMT 2444 ! Use zgemmt extension BLAS3 provided by e.g. MKL 2445 r_ovlp = zero 2446 call DGEMMT("U", "T", "N", nband, 2*npwsp, one, cg, 2*npwsp, gsc, 2*npwsp, zero, r_ovlp, nband) 2447 #else 2448 call DGEMM("T", "N", nband, nband, 2*npwsp, one, cg, 2*npwsp, gsc, 2*npwsp, zero, r_ovlp, nband) 2449 #endif 2450 r_ovlp = two * r_ovlp 2451 2452 if (istwfk == 2 .and. me_g0 == 1) then 2453 ! Extract the real part at G=0 and subtract its contribution to the overlap. 2454 call dcopy(nband, cg, 2*npwsp, rcg0, 1) 2455 call dcopy(nband, gsc, 2*npwsp, rg0sc, 1) 2456 do b2=1,nband 2457 do b1=1,b2 2458 r_ovlp(b1,b2) = r_ovlp(b1,b2) - rcg0(b1) * rg0sc(b2) 2459 end do 2460 end do 2461 end if 2462 2463 ! Sum the overlap if PW are distributed. 2464 if (comm_pw /= xmpi_comm_self) call xmpi_sum(r_ovlp, comm_pw, ierr) 2465 2466 ! 2) Cholesky factorization: O = U^H U with U upper triangle matrix. 2467 call DPOTRF('U', nband, r_ovlp, nband, ierr) 2468 ABI_CHECK(ierr == 0, sjoin('DPOTRF returned info:', itoa(ierr))) 2469 2470 ! 3) Solve X U = cg. 2471 call DTRSM('R', 'U', 'N', 'N', 2*npwsp, nband, one, r_ovlp, nband, cg, 2*npwsp) 2472 2473 ! 4) Solve Y U = gsc. On exit <cg|gsc> = 1 2474 call DTRSM('R', 'U', 'N', 'N', 2*npwsp, nband, one, r_ovlp, nband, gsc, 2*npwsp) 2475 2476 !call cg_set_imag0_to_zero(istwfk, me_g0, npwsp, nband, cg, max_absimag) 2477 !call cg_set_imag0_to_zero(istwfk, me_g0, npwsp, nband, gsc, max_absimag) 2478 2479 if (present(umat)) then 2480 ABI_REMALLOC(umat, (1, nband, nband)) 2481 umat(1,:,:) = r_ovlp 2482 end if 2483 2484 ABI_FREE(r_ovlp) 2485 2486 else 2487 ! 1) Calculate O_ij = <phi_i|S|phi_j> (complex Hermitean) 2488 ABI_MALLOC(c_ovlp, (2, nband, nband)) 2489 2490 #ifdef HAVE_LINALG_GEMMT 2491 c_ovlp = zero 2492 call ZGEMMT("U", "C", "N", nband, npwsp, cone, cg, npwsp, gsc, npwsp, czero, c_ovlp, nband) 2493 #else 2494 call abi_zgemm_2r("C", "N", nband, nband, npwsp, cone, cg, npwsp, gsc, npwsp, czero, c_ovlp, nband) 2495 #endif 2496 2497 ! Sum the overlap if PW are distributed. 2498 if (comm_pw /= xmpi_comm_self) call xmpi_sum(c_ovlp, comm_pw, ierr) 2499 ! 2500 ! 2) Cholesky factorization: O = U^H U with U upper triangle matrix. 2501 call ZPOTRF('U', nband, c_ovlp, nband, ierr) 2502 ABI_CHECK(ierr == 0, sjoin('ZPOTRF returned info:', itoa(ierr))) 2503 2504 ! 3) Solve X U = cg. 2505 call ZTRSM('R', 'U', 'N', 'N', npwsp, nband, cone, c_ovlp, nband, cg, npwsp) 2506 2507 ! 4) Solve Y U = gsc. On exit <cg|gsc> = 1 2508 call ZTRSM('R', 'U', 'N', 'N', npwsp, nband, cone, c_ovlp, nband, gsc, npwsp) 2509 2510 if (present(umat)) then 2511 ABI_REMALLOC(umat, (2, nband, nband)) 2512 umat = c_ovlp 2513 end if 2514 2515 ABI_FREE(c_ovlp) 2516 end if 2517 2518 !call cgpaw_normalize(npwsp, nband, cg, gsc, istwfk, me_g0, comm_pw) 2519 2520 end subroutine cgpaw_cholesky
m_cgtools/cgpaw_gramschmidt [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cgpaw_gramschmidt
FUNCTION
Gram-Schmidt orthonormalization of the vectors stored in cg
INPUTS
npwsp=Size of each vector (usually npw*nspinor) nband=Number of bands in cg istwfk=Storage mode for the wavefunctions. 1 for standard full mode me_g0=1 if this node has G=0. comm_pw=MPI communicator for the planewave group. Set to xmpi_comm_self for sequential mode.
SIDE EFFECTS
cg(2*npwsp*nband), gsc(2*npwsp*nband) input: Input set of vectors. output: Orthonormalized set.
SOURCE
2934 subroutine cgpaw_gramschmidt(npwsp, nband, cg, gsc, istwfk, me_g0, comm_pw) 2935 2936 !Arguments ------------------------------------ 2937 !scalars 2938 integer,intent(in) :: npwsp,nband,istwfk,comm_pw,me_g0 2939 !arrays 2940 real(dp),intent(inout) :: cg(2*npwsp*nband),gsc(2*npwsp*nband) 2941 2942 !Local variables ------------------------------ 2943 !scalars 2944 integer :: b1,nb2,opt 2945 logical :: normalize 2946 2947 ! ************************************************************************* 2948 2949 ! Normalize the first vector. 2950 call cgpaw_normalize(npwsp,1,cg(1),gsc(1),istwfk,me_g0,comm_pw) 2951 if (nband == 1) RETURN 2952 2953 ! Orthogonalize b1 wrt to the bands in [1,b1-1]. 2954 normalize = .TRUE. 2955 do b1=2,nband 2956 opt = 1 + 2*npwsp*(b1-1) 2957 nb2=b1-1 2958 call cgpaw_gsortho(npwsp,nb2,cg(1),gsc(1),1,cg(opt),gsc(opt),istwfk,normalize,me_g0,comm_pw) 2959 end do 2960 2961 end subroutine cgpaw_gramschmidt
m_cgtools/cgpaw_gsortho [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cgpaw_gsortho
FUNCTION
This routine uses the Gram-Schmidt method to orthogonalize a set of PAW wavefunctions. with respect to an input block of states.
INPUTS
npwsp=Size of each vector (usually npw*nspinor) nband1=Number of vectors in the input block icg1 icg1(2*npwsp*nband1)=Input block of vectors. igsc1(2*npwsp*nband1)= S|C> for C in icg1. nband2=Number of vectors to orthogonalize normalize=True if output wavefunction must be normalized. istwfk=Storage mode for the wavefunctions. 1 for standard full mode me_g0=1 if this node has G=0. comm_pw=MPI communicator for the planewave group. Set to xmpi_comm_self for sequential mode.
SIDE EFFECTS
iocg2(2*npwsp*nband2), iogsc2(2*npwsp*nband1) input: set of |C> and S|C> wher |C> is the set of states to orthogonalize output: Orthonormalized set.
SOURCE
2850 subroutine cgpaw_gsortho(npwsp, nband1, icg1, igsc1, nband2, iocg2, iogsc2, istwfk, normalize, me_g0, comm_pw) 2851 2852 !Arguments ------------------------------------ 2853 !scalars 2854 integer,intent(in) :: npwsp, nband1, nband2, istwfk, me_g0 2855 integer,optional,intent(in) :: comm_pw 2856 logical,intent(in) :: normalize 2857 !arrays 2858 real(dp),intent(in) :: icg1(2*npwsp*nband1),igsc1(2*npwsp*nband1) 2859 real(dp),intent(inout) :: iocg2(2*npwsp*nband2),iogsc2(2*npwsp*nband2) 2860 2861 !Local variables ------------------------------ 2862 !scalars 2863 integer :: ierr,b1,b2 2864 !arrays 2865 real(dp) :: r_icg1(nband1),r_iocg2(nband2) 2866 real(dp),allocatable :: proj(:,:,:) 2867 2868 ! ************************************************************************* 2869 2870 ABI_MALLOC(proj,(2,nband1,nband2)) 2871 2872 ! 1) Calculate <cg1|cg2> 2873 call cg_zgemm("C","N",npwsp,nband1,nband2,igsc1,iocg2,proj) 2874 2875 if (istwfk>1) then 2876 ! nspinor is always 1 in this case. 2877 ! Account for the missing G and set the imaginary part to zero since wavefunctions are real. 2878 proj(1,:,:) = two * proj(1,:,:) 2879 proj(2,:,:) = zero 2880 ! 2881 if (istwfk==2 .and. me_g0==1) then 2882 ! Extract the real part at G=0 and subtract its contribution. 2883 call dcopy(nband1,igsc1,2*npwsp,r_icg1, 1) 2884 call dcopy(nband2,iocg2,2*npwsp,r_iocg2,1) 2885 do b2=1,nband2 2886 do b1=1,nband1 2887 proj(1,b1,b2) = proj(1,b1,b2) - r_icg1(b1) * r_iocg2(b2) 2888 end do 2889 end do 2890 end if 2891 2892 end if 2893 2894 ! This is for the MPI version 2895 if (comm_pw /= xmpi_comm_self) call xmpi_sum(proj,comm_pw,ierr) 2896 2897 ! 2) 2898 ! cg2 = cg2 - <Scg1|cg2> cg1 2899 ! S cg2 = S cg2 - <Scg1|cg2> S cg1 2900 call cg_zgemm("N","N",npwsp,nband1,nband2,icg1,proj,iocg2,alpha=-cg_cone,beta=cg_cone) 2901 call cg_zgemm("N","N",npwsp,nband1,nband2,igsc1,proj,iogsc2,alpha=-cg_cone,beta=cg_cone) 2902 2903 ABI_FREE(proj) 2904 2905 ! 3) Normalize iocg2 and iogsc2 if required. 2906 if (normalize) call cgpaw_normalize(npwsp, nband2, iocg2, iogsc2, istwfk, me_g0, comm_pw) 2907 2908 end subroutine cgpaw_gsortho
m_cgtools/cgpaw_normalize [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
cgpaw_normalize
FUNCTION
Normalize a set of PAW pseudo wavefunctions.
INPUTS
npwsp=Size of each vector (usually npw*nspinor) nband=Number of band in cg and gsc istwfk=Storage mode for the wavefunctions. 1 for standard full mode me_g0=1 if this node has G=0. comm_pw=MPI communicator for the planewave group. Set to xmpi_comm_self for sequential mode.
SIDE EFFECTS
cg(2*npwsp*nband) input: Input set of vectors |C> output: Normalized set such as <C|S|C> = 1 gsc(2*npwsp*nband) input: Input set of vectors S|C> output: New S|C> compute with the new |C>
SOURCE
2760 subroutine cgpaw_normalize(npwsp, nband, cg, gsc, istwfk, me_g0, comm_pw) 2761 2762 !Arguments ------------------------------------ 2763 !scalars 2764 integer,intent(in) :: npwsp, nband, istwfk, me_g0, comm_pw 2765 !arrays 2766 real(dp),intent(inout) :: cg(2*npwsp*nband), gsc(2*npwsp*nband) 2767 2768 !Local variables ------------------------------ 2769 !scalars 2770 integer :: ptr,ierr,band 2771 character(len=500) :: msg 2772 !arrays 2773 real(dp) :: norm(nband),alpha(2) 2774 2775 ! ************************************************************************* 2776 2777 !$OMP PARALLEL DO PRIVATE(ptr) IF (nband > 1) 2778 do band=1,nband 2779 ptr = 1 + 2*npwsp*(band-1) 2780 norm(band) = cg_real_zdotc(npwsp, gsc(ptr), cg(ptr)) 2781 end do 2782 2783 if (istwfk>1) then 2784 norm = two * norm 2785 if (istwfk==2 .and. me_g0==1) then 2786 !$OMP PARALLEL DO PRIVATE(ptr) IF (nband > 1) 2787 do band=1,nband 2788 ptr = 1 + 2*npwsp*(band-1) 2789 norm(band) = norm(band) - gsc(ptr) * cg(ptr) 2790 end do 2791 end if 2792 end if 2793 2794 if (comm_pw /= xmpi_comm_self) call xmpi_sum(norm, comm_pw, ierr) 2795 2796 ierr = 0 2797 do band=1,nband 2798 if (norm(band) > zero) then 2799 norm(band) = SQRT(norm(band)) 2800 else 2801 ierr = ierr + 1 2802 end if 2803 end do 2804 2805 if (ierr/=0) then 2806 write(msg,'(a,i0,a)')" Found ",ierr," vectors with norm <= zero!" 2807 ABI_ERROR(msg) 2808 end if 2809 2810 ! Scale |C> and S|C>. 2811 !$OMP PARALLEL DO PRIVATE(ptr,alpha) IF (nband > 1) 2812 do band=1,nband 2813 ptr = 1 + 2*npwsp*(band-1) 2814 alpha = [one / norm(band), zero] 2815 call cg_zscal(npwsp, alpha, cg(ptr)) 2816 call cg_zscal(npwsp, alpha, gsc(ptr)) 2817 end do 2818 2819 end subroutine cgpaw_normalize
m_cgtools/dotprod_g [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
dotprod_g
FUNCTION
Compute scalar product <vec1|vect2> of complex vectors vect1 and vect2 (can be the same) Take into account the storage mode of the vectors (istwf_k) If option=1, compute only real part, if option=2 compute also imaginary part. If the number of calls to the dot product scales quadratically with the volume of the system, it is preferable not to call the present routine, but but to write a specially optimized routine, that will avoid many branches related to the existence of 'option' and 'istwf_k'.
INPUTS
istwf_k=option parameter that describes the storage of wfs vect1(2,npw)=first vector (one should take its complex conjugate) vect2(2,npw)=second vector npw= (effective) number of planewaves at this k point (including spinorial level) option= 1 if only real part to be computed, 2 if both real and imaginary. 3 if in case istwf_k==1 must compute real and imaginary parts, but if istwf_k >1 must compute only real part me_g0=1 if this processor treats G=0, 0 otherwise comm=MPI communicator used to reduce the results.
OUTPUT
$doti=\Im ( <vect1|vect2> )$ , output only if option=2 and eventually option=3. $dotr=\Re ( <vect1|vect2> )$
SOURCE
1027 subroutine dotprod_g(dotr, doti, istwf_k, npw, option, vect1, vect2, me_g0, comm) 1028 1029 !Arguments ------------------------------------ 1030 !scalars 1031 integer,intent(in) :: istwf_k,npw,option,me_g0,comm 1032 real(dp),intent(out) :: doti,dotr 1033 !arrays 1034 real(dp),intent(in) :: vect1(2,npw),vect2(2,npw) 1035 1036 !Local variables------------------------------- 1037 integer :: ierr 1038 real(dp) :: dotarr(2) 1039 1040 ! ************************************************************************* 1041 1042 ! Init results indipendently of option. 1043 dotr = zero 1044 doti = zero 1045 1046 if (istwf_k==1) then 1047 ! General k-point 1048 1049 if(option==1)then 1050 dotr = cg_real_zdotc(npw,vect1,vect2) 1051 else 1052 dotarr = cg_zdotc(npw,vect1,vect2) 1053 dotr = dotarr(1) 1054 doti = dotarr(2) 1055 end if 1056 1057 else if (istwf_k==2 .and. me_g0==1) then 1058 ! Gamma k-point and I have G=0 1059 dotr=half*vect1(1,1)*vect2(1,1) 1060 dotr = dotr + cg_real_zdotc(npw-1,vect1(1,2),vect2(1,2)) 1061 dotr = two*dotr 1062 if (option==2) doti=zero 1063 1064 else 1065 ! Other TR k-points 1066 dotr = cg_real_zdotc(npw,vect1,vect2) 1067 dotr=two*dotr 1068 if (option==2) doti=zero 1069 end if 1070 1071 !Reduction in case of parallelism 1072 if (xmpi_comm_size(comm) > 1) then 1073 if (option==1.or.istwf_k/=1) then 1074 call xmpi_sum(dotr,comm,ierr) 1075 else 1076 dotarr(1)=dotr ; dotarr(2)=doti 1077 call xmpi_sum(dotarr,comm,ierr) 1078 dotr=dotarr(1) ; doti=dotarr(2) 1079 end if 1080 end if 1081 1082 end subroutine dotprod_g
m_cgtools/dotprod_v [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
dotprod_v
FUNCTION
Compute dot product of two potentials (integral over FFT grid), to obtain a square residual-like quantity (so the sum of product of values is NOT divided by the number of FFT points, and NOT multiplied by the primitive cell volume). Take into account the spin components of the potentials (nspden), and sum over them.
INPUTS
cplex=if 1, real space functions on FFT grid are REAL, if 2, COMPLEX nfft= (effective) number of FFT grid points (for this processor) nspden=number of spin-density components opt_storage: 0, if potentials are stored as V^up-up, V^dn-dn, Re[V^up-dn], Im[V^up-dn] 1, if potentials are stored as V, B_x, B_y, Bz (B=magn. field) pot1(cplex*nfft,nspden)=first real space potential on FFT grid pot2(cplex*nfft,nspden)=second real space potential on FFT grid comm=MPI communicator in which results will be reduced.
OUTPUT
dotr= value of the dot product
SOURCE
1264 subroutine dotprod_v(cplex,dotr,nfft,nspden,opt_storage,pot1,pot2,comm) 1265 1266 !Arguments ------------------------------------ 1267 !scalars 1268 integer,intent(in) :: cplex,nfft,nspden,opt_storage,comm 1269 real(dp),intent(out) :: dotr 1270 !arrays 1271 real(dp),intent(in) :: pot1(cplex*nfft,nspden),pot2(cplex*nfft,nspden) 1272 1273 !Local variables------------------------------- 1274 !scalars 1275 integer :: ierr,ifft,ispden 1276 real(dp) :: ar 1277 !arrays 1278 1279 ! ************************************************************************* 1280 1281 !Real or complex inputs are coded 1282 1283 dotr=zero 1284 !$OMP PARALLEL DO COLLAPSE(2) REDUCTION(+:dotr) 1285 do ispden=1,min(nspden,2) 1286 do ifft=1,cplex*nfft 1287 dotr =dotr + pot1(ifft,ispden)*pot2(ifft,ispden) 1288 end do 1289 end do 1290 1291 if (nspden==4) then 1292 ar=zero 1293 !$OMP PARALLEL DO COLLAPSE(2) REDUCTION(+:ar) 1294 do ispden=3,4 1295 do ifft=1,cplex*nfft 1296 ar = ar + pot1(ifft,ispden)*pot2(ifft,ispden) 1297 end do 1298 end do 1299 1300 if (opt_storage==0) then 1301 if (cplex==1) then 1302 dotr = dotr+two*ar 1303 else 1304 dotr = dotr+ar 1305 end if 1306 else 1307 dotr = half*(dotr+ar) 1308 end if 1309 end if 1310 1311 !MPIWF reduction (addition) on dotr is needed here 1312 if (xmpi_comm_size(comm)>1) then 1313 call xmpi_sum(dotr,comm,ierr) 1314 end if 1315 1316 end subroutine dotprod_v
m_cgtools/dotprod_vn [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
dotprod_vn
FUNCTION
Compute dot product of potential and density (integral over FFT grid), to obtain an energy-like quantity (so the usual dotproduct is divided by the number of FFT points, and multiplied by the primitive cell volume). Take into account the spin components of the density and potentials (nspden), and sum correctly over them. Note that the storage of densities and potentials is different : for potential, one stores the matrix components, while for the density, one stores the trace, and then, either the up component (if nspden=2), or the magnetization vector (if nspden=4).
INPUTS
cplex=if 1, real space functions on FFT grid are REAL, if 2, COMPLEX dens(cplex*nfft,nspden)=real space density on FFT grid mpi_enreg=information about MPI parallelization nfft= (effective) number of FFT grid points (for this processor) nfftot= total number of FFT grid points nspden=number of spin-density components option= if 1, only the real part is computed if 2, both real and imaginary parts are computed (not yet coded) pot(cplex*nfft,nspden)=real space potential on FFT grid (will be complex conjugated if cplex=2 and option=2) ucvol=unit cell volume (Bohr**3)
OUTPUT
doti= imaginary part of the dot product, output only if option=2 (and cplex=2). dotr= real part
NOTES
Concerning storage when nspden=4: cplex=1: V is stored as : V^11, V^22, Re[V^12], Im[V^12] (complex, hermitian) N is stored as : n, m_x, m_y, m_z (real) cplex=2: V is stored as : V^11, V^22, V^12, i.V^21 (complex) N is stored as : n, m_x, m_y, mz (complex)
SOURCE
1361 subroutine dotprod_vn(cplex,dens,dotr,doti,nfft,nfftot,nspden,option,pot,ucvol, & 1362 mpi_comm_sphgrid) ! Optional 1363 1364 !Arguments ------------------------------------ 1365 !scalars 1366 integer,intent(in) :: cplex,nfft,nfftot,nspden,option 1367 integer,intent(in),optional :: mpi_comm_sphgrid 1368 real(dp),intent(in) :: ucvol 1369 real(dp),intent(out) :: doti,dotr 1370 !arrays 1371 real(dp),intent(in) :: dens(cplex*nfft,nspden),pot(cplex*nfft,nspden) 1372 1373 !Local variables------------------------------- 1374 !scalars 1375 integer :: ierr,ifft,jfft 1376 real(dp) :: dim11,dim12,dim21,dim22,dim_dn,dim_up,dre11,dre12,dre21,dre22 1377 real(dp) :: dre_dn,dre_up,factor,nproc_sphgrid,pim11,pim12,pim21,pim22,pim_dn,pim_up,pre11 1378 real(dp) :: pre12,pre21,pre22,pre_dn,pre_up 1379 real(dp) :: bx_re,bx_im,by_re,by_im,bz_re,bz_im,v0_re,v0_im 1380 !arrays 1381 real(dp) :: buffer2(2) 1382 1383 ! ************************************************************************* 1384 1385 !Real or complex inputs are coded 1386 DBG_CHECK(ANY(cplex==(/1,2/)),"Wrong cplex") 1387 DBG_CHECK(ANY(nspden==(/1,2,4/)),"Wrong nspden") 1388 1389 !Real or complex output are coded 1390 DBG_CHECK(ANY(option==(/1,2/)),"Wrong option") 1391 1392 dotr=zero; doti=zero 1393 1394 if(nspden==1)then 1395 1396 if(option==1 .or. cplex==1 )then 1397 !$OMP PARALLEL DO REDUCTION(+:dotr) 1398 do ifft=1,cplex*nfft 1399 dotr=dotr + pot(ifft,1)*dens(ifft,1) 1400 end do 1401 ! dotr = ddot(cplex*nfft,pot,1,dens,1) 1402 1403 else ! option==2 and cplex==2 : one builds the imaginary part, from complex den/pot 1404 1405 !$OMP PARALLEL DO PRIVATE(jfft) REDUCTION(+:dotr,doti) 1406 do ifft=1,nfft 1407 jfft=2*ifft 1408 dotr=dotr + pot(jfft-1,1)*dens(jfft-1,1) + pot(jfft,1)*dens(jfft ,1) 1409 doti=doti + pot(jfft-1,1)*dens(jfft ,1) - pot(jfft,1)*dens(jfft-1,1) 1410 end do 1411 1412 end if 1413 1414 else if(nspden==2)then 1415 1416 if(option==1 .or. cplex==1 )then 1417 !$OMP PARALLEL DO REDUCTION(+:dotr) 1418 do ifft=1,cplex*nfft 1419 dotr=dotr + pot(ifft,1)* dens(ifft,2) & ! This is the spin up contribution 1420 & + pot(ifft,2)*(dens(ifft,1)-dens(ifft,2)) ! This is the spin down contribution 1421 end do 1422 1423 else ! option==2 and cplex==2 : one builds the imaginary part, from complex den/pot 1424 1425 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(nfft,dens,pot) REDUCTION(+:dotr,doti) 1426 do ifft=1,nfft 1427 1428 jfft=2*ifft 1429 dre_up=dens(jfft-1,2) 1430 dim_up=dens(jfft ,2) 1431 dre_dn=dens(jfft-1,1)-dre_up 1432 dim_dn=dens(jfft ,1)-dim_up 1433 pre_up=pot(jfft-1,1) 1434 pim_up=pot(jfft ,1) 1435 pre_dn=pot(jfft-1,2) 1436 pim_dn=pot(jfft ,2) 1437 1438 dotr=dotr + pre_up * dre_up & 1439 & + pim_up * dim_up & 1440 & + pre_dn * dre_dn & 1441 & + pim_dn * dim_dn 1442 doti=doti + pre_up * dim_up & 1443 & - pim_up * dre_up & 1444 & + pre_dn * dim_dn & 1445 & - pim_dn * dre_dn 1446 1447 end do 1448 end if 1449 1450 else if(nspden==4)then 1451 ! \rho{\alpha,\beta} V^{\alpha,\beta} = 1452 ! rho*(V^{11}+V^{22})/2$ 1453 ! + m_x Re(V^{12})- m_y Im{V^{12}}+ m_z(V^{11}-V^{22})/2 1454 if (cplex==1) then 1455 !$OMP PARALLEL DO PRIVATE(ifft) SHARED(nfft,dens,pot) REDUCTION(+:dotr) 1456 do ifft=1,nfft 1457 dotr=dotr + & 1458 & (pot(ifft,1) + pot(ifft,2))*half*dens(ifft,1) & ! This is the density contrib 1459 & + pot(ifft,3) *dens(ifft,2) & ! This is the m_x contrib 1460 & - pot(ifft,4) *dens(ifft,3) & ! This is the m_y contrib 1461 & +(pot(ifft,1) - pot(ifft,2))*half*dens(ifft,4) ! This is the m_z contrib 1462 end do 1463 else ! cplex=2 1464 ! Note concerning storage when cplex=2: 1465 ! V is stored as : v^11, v^22, V^12, i.V^21 (each are complex) 1466 ! N is stored as : n, m_x, m_y, mZ (each are complex) 1467 if (option==1) then 1468 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(nfft,dens,pot) REDUCTION(+:dotr) 1469 do ifft=1,nfft 1470 jfft=2*ifft 1471 dre11=half*(dens(jfft-1,1)+dens(jfft-1,4)) 1472 dim11=half*(dens(jfft ,1)+dens(jfft-1,4)) 1473 dre22=half*(dens(jfft-1,1)-dens(jfft-1,4)) 1474 dim22=half*(dens(jfft ,1)-dens(jfft-1,4)) 1475 dre12=half*(dens(jfft-1,2)+dens(jfft ,3)) 1476 dim12=half*(dens(jfft ,2)-dens(jfft-1,3)) 1477 dre21=half*(dens(jfft-1,2)-dens(jfft ,3)) 1478 dim21=half*(dens(jfft ,2)+dens(jfft-1,3)) 1479 pre11= pot(jfft-1,1) 1480 pim11= pot(jfft ,1) 1481 pre22= pot(jfft-1,2) 1482 pim22= pot(jfft ,2) 1483 pre12= pot(jfft-1,3) 1484 pim12= pot(jfft ,3) 1485 pre21= pot(jfft ,4) 1486 pim21=-pot(jfft-1,4) 1487 1488 v0_re=half*(pre11+pre22) 1489 v0_im=half*(pim11+pim22) 1490 bx_re=half*(pre12+pre21) 1491 bx_im=half*(pim12+pim21) 1492 by_re=half*(-pim12+pim21) 1493 by_im=half*(pre12-pre21) 1494 bz_re=half*(pre11-pre22) 1495 bz_im=half*(pim11-pim22) 1496 dotr=dotr+v0_re * dens(jfft-1,1)& 1497 & + v0_im * dens(jfft ,1) & 1498 & + bx_re * dens(jfft-1,2) & 1499 & + bx_im * dens(jfft ,2) & 1500 & + by_re * dens(jfft-1,3) & 1501 & + by_im * dens(jfft ,3) & 1502 & + bz_re * dens(jfft-1,4) & 1503 & + bz_im * dens(jfft ,4) 1504 ! dotr=dotr + pre11 * dre11 & 1505 !& + pim11 * dim11 & 1506 !& + pre22 * dre22 & 1507 !& + pim22 * dim22 & 1508 !& + pre12 * dre12 & 1509 !& + pim12 * dim12 & 1510 !& + pre21 * dre21 & 1511 !& + pim21 * dim21 1512 end do 1513 else ! option=2 1514 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(nfft,dens,pot) REDUCTION(+:dotr,doti) 1515 do ifft=1,nfft 1516 jfft=2*ifft 1517 dre11=half*(dens(jfft-1,1)+dens(jfft-1,4)) 1518 dim11=half*(dens(jfft ,1)+dens(jfft-1,4)) 1519 dre22=half*(dens(jfft-1,1)-dens(jfft-1,4)) 1520 dim22=half*(dens(jfft ,1)-dens(jfft-1,4)) 1521 dre12=half*(dens(jfft-1,2)+dens(jfft ,3)) 1522 dim12=half*(dens(jfft ,2)-dens(jfft-1,3)) 1523 dre21=half*(dens(jfft-1,2)-dens(jfft ,3)) 1524 dim21=half*(dens(jfft ,2)+dens(jfft-1,3)) 1525 pre11= pot(jfft-1,1) 1526 pim11= pot(jfft ,1) 1527 pre22= pot(jfft-1,2) 1528 pim22= pot(jfft ,2) 1529 pre12= pot(jfft-1,3) 1530 pim12= pot(jfft ,3) 1531 pre21= pot(jfft ,4) 1532 pim21=-pot(jfft-1,4) 1533 dotr=dotr + pre11 * dre11 & 1534 & + pim11 * dim11 & 1535 & + pre22 * dre22 & 1536 & + pim22 * dim22 & 1537 & + pre12 * dre12 & 1538 & + pim12 * dim12 & 1539 & + pre21 * dre21 & 1540 & + pim21 * dim21 1541 doti=doti + pre11 * dim11 & 1542 & - pim11 * dre11 & 1543 & + pre22 * dim22 & 1544 & - pim22 * dre22 & 1545 & + pre12 * dim12 & 1546 & - pim12 * dre12 & 1547 & + pre21 * dim21 & 1548 & - pim21 * dre21 1549 end do 1550 end if ! option 1551 end if ! cplex 1552 end if ! nspden 1553 1554 factor=ucvol/dble(nfftot) 1555 dotr=factor*dotr 1556 doti=factor*doti 1557 1558 !MPIWF reduction (addition) on dotr, doti is needed here 1559 if(present(mpi_comm_sphgrid)) then 1560 nproc_sphgrid=xmpi_comm_size(mpi_comm_sphgrid) 1561 if(nproc_sphgrid>1) then 1562 buffer2(1)=dotr 1563 buffer2(2)=doti 1564 call xmpi_sum(buffer2,mpi_comm_sphgrid,ierr) 1565 dotr=buffer2(1) 1566 doti=buffer2(2) 1567 end if 1568 end if 1569 1570 end subroutine dotprod_vn
m_cgtools/fxphas_and_cmp [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
fxphas_and_com
FUNCTION
Fix phase and compare two set of wavefunctions
OUTPUT
SOURCE
4111 logical function fxphas_and_cmp(npw_k, nspinor, nband_k, istwfk, cg1, cg2, eig_k, msg, atol_rho, atol_dphi) result(ok) 4112 4113 !Arguments ------------------------------------ 4114 integer,intent(in) :: npw_k, nspinor, nband_k, istwfk 4115 real(dp),intent(inout) :: cg1(2, npw_k*nspinor, nband_k), cg2(2, npw_k*nspinor, nband_k) 4116 real(dp),intent(in) :: eig_k(nband_k) 4117 character(len=*),intent(out) :: msg 4118 real(dp),optional,intent(in) :: atol_rho, atol_dphi 4119 4120 !Local variables------------------------------- 4121 integer, parameter :: useoverlap0 = 0, mgsc = 0 4122 integer :: ipw, ipwsp, isp, mcg, band 4123 real(dp) :: phi1, rho1, phi2, rho2, max_rho_adiff, atol_rho__, phi_diff_ref, max_dphi_adiff, atol_dphi__, gsc(0,0) 4124 character(len=500) :: btype 4125 4126 ! *********************************************************************** 4127 4128 atol_rho__ = tol6; if (present(atol_rho)) atol_rho__ = atol_rho 4129 atol_dphi__ = tol3; if (present(atol_dphi)) atol_dphi__ = atol_dphi 4130 max_rho_adiff = zero; max_dphi_adiff = zero; phi_diff_ref = huge(one) 4131 4132 mcg = npw_k * nspinor * nband_k 4133 call fxphas_seq(cg1, gsc, 1, 1, istwfk, mcg, mgsc, nband_k, npw_k * nspinor, useoverlap0) 4134 call fxphas_seq(cg2, gsc, 1, 1, istwfk, mcg, mgsc, nband_k, npw_k * nspinor, useoverlap0) 4135 4136 do band=1,nband_k 4137 call band_type(band, btype) 4138 if (btype == "degenerate") cycle 4139 write(234, *)"band: ", band, "istwfk: ", istwfk, trim(btype) 4140 write(235, *)"band: ", band, "istwfk:", istwfk, trim(btype) 4141 write(234, *)"cg1:"; write(235, *)"cg2:" 4142 !write(234, *)"cg1 rho:"; write(235, *)"cg2 rho phi:" 4143 do isp=1,nspinor 4144 do ipw=1,npw_k 4145 ipwsp = ipw + (isp - 1) * npw_k 4146 if (npw_k > 15 .and. ipw > 15 .and. ipw < npw_k - 15) cycle 4147 !write(234, *)ipwsp, cg1(1, ipwsp, band); write(234, *)ipwsp, cg1(2, ipwsp, band) 4148 !write(235, *)ipwsp, cg2(1, ipwsp, band); write(235, *)ipwsp, cg2(2, ipwsp, band) 4149 call rhophi(cg1(:, ipwsp, band), phi1, rho1) 4150 call rhophi(cg2(:, ipwsp, band), phi2, rho2) 4151 write(234, *)ipwsp, rho1!; write(234, *)ipwsp, phi1 4152 write(235, *)ipwsp, rho2!; write(235, *)ipwsp, phi2 4153 end do 4154 end do 4155 end do 4156 4157 do band=1,nband_k 4158 do ipw=1,npw_k * nspinor 4159 call rhophi(cg1(:, ipw, band), phi1, rho1) 4160 call rhophi(cg2(:, ipw, band), phi2, rho2) 4161 max_rho_adiff = max(max_rho_adiff, abs(rho1 - rho2)) 4162 if (rho1 > atol_rho__ ** 2) then 4163 if (phi_diff_ref /= huge(one)) phi_diff_ref = phi1 - phi2 4164 max_dphi_adiff = max(max_dphi_adiff, abs(phi_diff_ref - (phi1 - phi2))) 4165 end if 4166 end do 4167 end do 4168 4169 write(msg, "(2(a,es12.4))")"max_rho_adiff: ", max_rho_adiff, ", max_dphi_adiff: ", max_dphi_adiff 4170 ok = (max_rho_adiff < atol_rho__ .and. max_dphi_adiff < atol_dphi__) 4171 4172 contains 4173 subroutine band_type(band, btype) 4174 integer,intent(in) :: band 4175 character(len=*),intent(out) :: btype 4176 real(dp) :: e0 4177 4178 e0 = eig_k(band) 4179 4180 if (band == 1) then 4181 btype = "last_state" 4182 if (nband_k > 1) then 4183 btype = "non-degenerate" 4184 if (abs(e0 - eig_k(band + 1)) < tol6) btype = "degenerate" 4185 end if 4186 4187 else if (band == nband_k) then 4188 btype = "last_state" 4189 if (band - 1 > 0) then 4190 if (abs(e0 - eig_k(band - 1)) < tol6) btype = "degenerate" 4191 end if 4192 4193 else 4194 btype = "non-degenerate" 4195 if (abs(e0 - eig_k(band - 1)) < tol6 .or. abs(e0 - eig_k(band + 1)) < tol6) btype = "degenerate" 4196 end if 4197 4198 end subroutine band_type 4199 4200 end function fxphas_and_cmp
m_cgtools/fxphas_seq [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
fxphas_seq
FUNCTION
Fix phase of all bands. Keep normalization but maximize real part (minimize imag part). Also fix the sign of real part by setting the first non-zero element to be positive. This version has been stripped of all the mpi_enreg junk by MJV Use cgtk_fixphase if you need a routine that works with mpi_enreg and paral_kgb
INPUTS
cg(2,mcg)= contains the wavefunction |c> coefficients. gsc(2,mgsc)= if useoverlap==1, contains the S|c> coefficients, where S is an overlap matrix. icg=shift to be applied on the location of data in the array cg igsc=shift to be applied on the location of data in the array gsc istwfk=input option parameter that describes the storage of wfs (set to 1 if usual complex vectors) mcg=size of second dimension of cg mgsc=size of second dimension of gsc nband_k=number of bands npw_k=number of planewaves useoverlap=describe the overlap of wavefunctions: 0: no overlap (S=Identity_matrix) 1: PAW wavefunctions
OUTPUT
cg(2,mcg)=same array with altered phase. gsc(2,mgsc)= same array with altered phase.
SOURCE
3906 subroutine fxphas_seq(cg, gsc, icg, igsc, istwfk, mcg, mgsc, nband_k, npw_k, useoverlap) 3907 3908 !Arguments ------------------------------------ 3909 !scalars 3910 integer,intent(in) :: icg,igsc,istwfk,mcg,mgsc,nband_k,npw_k,useoverlap 3911 !arrays 3912 real(dp),intent(inout) :: cg(2,mcg),gsc(2,mgsc*useoverlap) 3913 3914 !Local variables------------------------------- 3915 !scalars 3916 integer :: iband,ii,indx 3917 real(dp) :: cim,cre,gscim,gscre,quotient,root1,root2,saa,sab,sbb,theta 3918 real(dp) :: thppi,xx,yy 3919 character(len=500) :: msg 3920 !arrays 3921 real(dp),allocatable :: cimb(:),creb(:),saab(:),sabb(:),sbbb(:) !,sarr(:,:) 3922 3923 ! ************************************************************************* 3924 3925 !The general case, where a complex phase indeterminacy is present 3926 if(istwfk==1)then 3927 3928 ABI_MALLOC(cimb,(nband_k)) 3929 ABI_MALLOC(creb,(nband_k)) 3930 ABI_MALLOC(saab,(nband_k)) 3931 ABI_MALLOC(sabb,(nband_k)) 3932 ABI_MALLOC(sbbb,(nband_k)) 3933 cimb(:)=zero ; creb(:)=zero 3934 3935 ! Loop over bands 3936 ! TODO: MG store saa arrays in sarr(3,nband_k) to reduce false sharing. 3937 do iband=1,nband_k 3938 indx=icg+(iband-1)*npw_k 3939 3940 ! Compute several sums over Re, Im parts of c 3941 saa=0.0_dp ; sbb=0.0_dp ; sab=0.0_dp 3942 do ii=1+indx,npw_k+indx 3943 saa=saa+cg(1,ii)*cg(1,ii) 3944 sbb=sbb+cg(2,ii)*cg(2,ii) 3945 sab=sab+cg(1,ii)*cg(2,ii) 3946 end do 3947 saab(iband)=saa 3948 sbbb(iband)=sbb 3949 sabb(iband)=sab 3950 end do ! iband 3951 3952 3953 do iband=1,nband_k 3954 3955 indx=icg+(iband-1)*npw_k 3956 3957 saa=saab(iband) 3958 sbb=sbbb(iband) 3959 sab=sabb(iband) 3960 3961 ! Get phase angle theta 3962 if (sbb+saa>tol8)then 3963 if(abs(sbb-saa)>tol8*(sbb+saa) .or. 2*abs(sab)>tol8*(sbb+saa))then 3964 if (abs(sbb-saa)>tol8*abs(sab)) then 3965 quotient=sab/(sbb-saa) 3966 theta=0.5_dp*atan(2.0_dp*quotient) 3967 else 3968 ! Taylor expansion of the atan in terms of inverse of its argument. Correct up to 1/x2, included. 3969 theta=0.25_dp*(pi-(sbb-saa)/sab) 3970 end if 3971 ! Check roots to get theta for max Re part 3972 root1=cos(theta)**2*saa+sin(theta)**2*sbb-2.0_dp*cos(theta)*sin(theta)*sab 3973 thppi=theta+0.5_dp*pi 3974 root2=cos(thppi)**2*saa+sin(thppi)**2*sbb-2.0_dp*cos(thppi)*sin(thppi)*sab 3975 if (root2>root1) theta=thppi 3976 else 3977 ! The real part vector and the imaginary part vector are orthogonal, and of same norm. Strong indeterminacy. 3978 ! Will determine the first non-zero coefficient, and fix its phase 3979 do ii=1+indx,npw_k+indx 3980 cre=cg(1,ii) 3981 cim=cg(2,ii) 3982 if(cre**2+cim**2>tol8**2*(saa+sbb))then 3983 if(cre**2>tol8**2**cim**2)then 3984 theta=atan(cim/cre) 3985 else 3986 ! Taylor expansion of the atan in terms of inverse of its argument. Correct up to 1/x2, included. 3987 theta=pi/2-cre/cim 3988 end if 3989 exit 3990 end if 3991 end do 3992 end if 3993 else 3994 write(msg,'(a,i0,5a)')& 3995 'The eigenvector with band ',iband,' has zero norm.',ch10,& 3996 'This usually happens when the number of bands (nband) is comparable to the number of planewaves (mpw)',ch10,& 3997 'Action: Check the parameters of the calculation. If nband ~ mpw, then decrease nband or, alternatively, increase ecut' 3998 ABI_ERROR(msg) 3999 end if 4000 4001 xx=cos(theta) 4002 yy=sin(theta) 4003 4004 ! Here, set the first non-zero element to be positive 4005 do ii=1+indx,npw_k+indx 4006 cre=cg(1,ii) 4007 cim=cg(2,ii) 4008 cre=xx*cre-yy*cim 4009 if(abs(cre)>tol8)exit 4010 end do 4011 if(cre<zero)then 4012 xx=-xx ; yy=-yy 4013 end if 4014 4015 creb(iband)=xx 4016 cimb(iband)=yy 4017 4018 end do 4019 4020 do iband=1,nband_k 4021 4022 indx=icg+(iband-1)*npw_k 4023 4024 xx=creb(iband) 4025 yy=cimb(iband) 4026 do ii=1+indx,npw_k+indx 4027 cre=cg(1,ii) 4028 cim=cg(2,ii) 4029 cg(1,ii)=xx*cre-yy*cim 4030 cg(2,ii)=xx*cim+yy*cre 4031 end do 4032 4033 ! Alter phase of array S|cg> 4034 if (useoverlap==1) then 4035 indx=igsc+(iband-1)*npw_k 4036 do ii=1+indx,npw_k+indx 4037 gscre=gsc(1,ii) 4038 gscim=gsc(2,ii) 4039 gsc(1,ii)=xx*gscre-yy*gscim 4040 gsc(2,ii)=xx*gscim+yy*gscre 4041 end do 4042 end if 4043 4044 end do ! iband 4045 4046 ABI_FREE(cimb) 4047 ABI_FREE(creb) 4048 ABI_FREE(saab) 4049 ABI_FREE(sabb) 4050 ABI_FREE(sbbb) 4051 4052 else ! if istwfk/=1 4053 ! Storages that take into account the time-reversal symmetry: the freedom is only a sign freedom 4054 4055 ABI_MALLOC(creb,(nband_k)) 4056 creb(:)=zero 4057 ! Loop over bands 4058 do iband=1,nband_k 4059 4060 indx=icg+(iband-1)*npw_k 4061 4062 ! Here, set the first non-zero real element to be positive 4063 do ii=1+indx,npw_k+indx 4064 cre=cg(1,ii) 4065 if(abs(cre)>tol8)exit 4066 end do 4067 creb(iband)=cre 4068 4069 end do ! iband 4070 4071 do iband=1,nband_k 4072 4073 cre=creb(iband) 4074 if(cre<zero)then 4075 indx=icg+(iband-1)*npw_k 4076 do ii=1+indx,npw_k+indx 4077 cg(1,ii)=-cg(1,ii) 4078 cg(2,ii)=-cg(2,ii) 4079 end do 4080 if(useoverlap==1)then 4081 indx=igsc+(iband-1)*npw_k 4082 do ii=1+indx,npw_k+indx 4083 gsc(1,ii)=-gsc(1,ii) 4084 gsc(2,ii)=-gsc(2,ii) 4085 end do 4086 end if 4087 end if 4088 4089 end do ! iband 4090 4091 ABI_FREE(creb) 4092 4093 end if ! istwfk 4094 4095 end subroutine fxphas_seq
m_cgtools/matrixelmt_g [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
matrixelmt_g
FUNCTION
Compute a matrix element of two wavefunctions, in reciprocal space, for an operator that is diagonal in reciprocal space: <wf1|op|wf2> For the time being, only spin-independent operators are treated.
INPUTS
diag(npw)=diagonal operator (real, spin-independent !) istwf_k=storage mode of the vectors needimag=0 if the imaginary part is not needed ; 1 if the imaginary part is needed npw=number of planewaves of the first vector nspinor=number of spinor components vect1(2,npw*nspinor)=first vector vect2(2,npw*nspinor)=second vector comm_fft=MPI communicator for the FFT me_g0=1 if this processors treats the G=0 component.
OUTPUT
ai=imaginary part of the matrix element ar=real part of the matrix element
SOURCE
1113 subroutine matrixelmt_g(ai,ar,diag,istwf_k,needimag,npw,nspinor,vect1,vect2,me_g0,comm_fft) 1114 1115 !Arguments ------------------------------------ 1116 !scalars 1117 integer,intent(in) :: istwf_k,needimag,npw,nspinor,me_g0,comm_fft 1118 real(dp),intent(out) :: ai,ar 1119 !arrays 1120 real(dp),intent(in) :: diag(npw),vect1(2,npw*nspinor),vect2(2,npw*nspinor) 1121 1122 !Local variables------------------------------- 1123 !scalars 1124 integer :: i1,ierr,ipw 1125 character(len=500) :: msg 1126 !arrays 1127 real(dp) :: buffer2(2) 1128 !real(dp),allocatable :: re_prod(:), im_prod(:) 1129 1130 ! ************************************************************************* 1131 1132 if (nspinor==2 .and. istwf_k/=1) then 1133 write(msg,'(a,a,a,i6,a,i6)')& 1134 'When istwf_k/=1, nspinor must be 1,',ch10,& 1135 'however, nspinor=',nspinor,', and istwf_k=',istwf_k 1136 ABI_BUG(msg) 1137 end if 1138 1139 #if 0 1140 !TODO 1141 ABI_MALLOC(re_prod,(npw*nspinor)) 1142 do ipw=1,npw*nspinor 1143 re_prod(ipw) = vect1(1,ipw)*vect2(1,ipw) + vect1(2,ipw)*vect2(2,ipw) 1144 end do 1145 1146 if (needimag == 1) then 1147 ABI_MALLOC(im_prod,(npw*nspinor)) 1148 do ipw=1,npw*nspinor 1149 im_prod(ipw) = vect1(1,ipw)*vect2(2,ipw) - vect1(2,ipw)*vect2(1,ipw) 1150 end do 1151 end if 1152 #endif 1153 1154 ar=zero 1155 if(needimag==1)ai=zero 1156 1157 !Normal storage mode 1158 if(istwf_k==1)then 1159 1160 ! Need only real part 1161 if(needimag==0)then 1162 1163 do ipw=1,npw 1164 ar=ar+diag(ipw)*(vect1(1,ipw)*vect2(1,ipw)+vect1(2,ipw)*vect2(2,ipw)) 1165 end do 1166 1167 if(nspinor==2)then 1168 do ipw=1+npw,2*npw 1169 ar=ar+diag(ipw-npw)*(vect1(1,ipw)*vect2(1,ipw)+vect1(2,ipw)*vect2(2,ipw)) 1170 end do 1171 end if 1172 1173 else ! Need also the imaginary part 1174 1175 do ipw=1,npw 1176 ar=ar+diag(ipw)*(vect1(1,ipw)*vect2(1,ipw)+vect1(2,ipw)*vect2(2,ipw)) 1177 ai=ai+diag(ipw)*(vect1(1,ipw)*vect2(2,ipw)-vect1(2,ipw)*vect2(1,ipw)) 1178 end do 1179 1180 if(nspinor==2)then 1181 do ipw=1+npw,2*npw 1182 ar=ar+diag(ipw-npw)*(vect1(1,ipw)*vect2(1,ipw)+vect1(2,ipw)*vect2(2,ipw)) 1183 ai=ai+diag(ipw-npw)*(vect1(1,ipw)*vect2(2,ipw)-vect1(2,ipw)*vect2(1,ipw)) 1184 end do 1185 end if 1186 1187 end if ! needimag 1188 1189 else if(istwf_k>=2)then 1190 1191 ! XG030513 : MPIWF need to know which proc has G=0 1192 1193 i1=1 1194 if(istwf_k==2 .and. me_g0==1)then 1195 ar=half*diag(1)*vect1(1,1)*vect2(1,1) ; i1=2 1196 end if 1197 1198 ! Need only real part 1199 if(needimag==0)then 1200 1201 do ipw=i1,npw 1202 ar=ar+diag(ipw)*(vect1(1,ipw)*vect2(1,ipw)+vect1(2,ipw)*vect2(2,ipw)) 1203 end do 1204 ar=two*ar 1205 1206 else ! Need also the imaginary part 1207 1208 do ipw=i1,npw 1209 ar=ar+diag(ipw)*(vect1(1,ipw)*vect2(1,ipw)+vect1(2,ipw)*vect2(2,ipw)) 1210 ai=ai+diag(ipw)*(vect1(1,ipw)*vect2(2,ipw)-vect1(2,ipw)*vect2(1,ipw)) 1211 end do 1212 ar=two*ar ; ai=two*ai 1213 1214 end if 1215 1216 end if ! istwf_k 1217 1218 #if 0 1219 ABI_FREE(re_prod) 1220 if (needimag == 1) then 1221 ABI_FREE(im_prod) 1222 end if 1223 #endif 1224 1225 !MPIWF need to make reduction on ar and ai . 1226 if (xmpi_comm_size(comm_fft)>1) then 1227 buffer2(1)=ai 1228 buffer2(2)=ar 1229 call xmpi_sum(buffer2,comm_fft ,ierr) 1230 ai=buffer2(1) 1231 ar=buffer2(2) 1232 end if 1233 1234 end subroutine matrixelmt_g
m_cgtools/mean_fftr [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
mean_fftr
FUNCTION
Compute the mean of an arraysp(nfft,nspden), over the FFT grid, for each component nspden, and return it in meansp(nspden). Take into account the spread of the array due to parallelism: the actual number of fft points is nfftot, but the number of points on this proc is nfft only. So : for ispden from 1 to nspden meansp(ispden) = sum(ifft=1,nfftot) arraysp(ifft,ispden) / nfftot
INPUTS
arraysp(nfft,nspden)=the array whose average has to be computed nfft=number of FFT points stored by one proc nfftot=total number of FFT points nspden=number of spin-density components
OUTPUT
meansp(nspden)=mean value for each nspden component
SOURCE
1680 subroutine mean_fftr(arraysp,meansp,nfft,nfftot,nspden,mpi_comm_sphgrid) 1681 1682 !Arguments ------------------------------------ 1683 !scalars 1684 integer,intent(in) :: nfft,nfftot,nspden 1685 integer,intent(in),optional:: mpi_comm_sphgrid 1686 !arrays 1687 real(dp),intent(in) :: arraysp(nfft,nspden) 1688 real(dp),intent(out) :: meansp(nspden) 1689 1690 !Local variables------------------------------- 1691 !scalars 1692 integer :: ierr,ifft,ispden,nproc_sphgrid 1693 real(dp) :: invnfftot,tmean 1694 1695 ! ************************************************************************* 1696 1697 invnfftot=one/(dble(nfftot)) 1698 1699 do ispden=1,nspden 1700 tmean=zero 1701 !$OMP PARALLEL DO REDUCTION(+:tmean) 1702 do ifft=1,nfft 1703 tmean=tmean+arraysp(ifft,ispden) 1704 end do 1705 meansp(ispden)=tmean*invnfftot 1706 end do 1707 1708 !XG030514 : MPIWF The values of meansp(ispden) should 1709 !now be summed accross processors in the same WF group, and spread on all procs. 1710 if(present(mpi_comm_sphgrid)) then 1711 nproc_sphgrid=xmpi_comm_size(mpi_comm_sphgrid) 1712 if(nproc_sphgrid>1) then 1713 call xmpi_sum(meansp,nspden,mpi_comm_sphgrid,ierr) 1714 end if 1715 end if 1716 1717 end subroutine mean_fftr
m_cgtools/overlap_g [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
overlap_g
FUNCTION
Compute the scalar product between WF at two different k-points < u_{n,k1} | u_{n,k2}>
INPUTS
mpw = maximum dimensioned size of npw npw_k1 = number of plane waves at k1 npw_k2 = number of plane waves at k2 nspinor = 1 for scalar, 2 for spinor wavefunctions pwind_k = array required to compute the scalar product (see initberry.f) vect1 = wavefunction at k1: | u_{n,k1} > vect2 = wavefunction at k1: | u_{n,k2} >
OUTPUT
doti = imaginary part of the scalarproduct dotr = real part of the scalarproduct
NOTES
In case a G-vector of the basis sphere of plane waves at k1 does not belong to the basis sphere of plane waves at k2, pwind = 0. Therefore, the dimensions of vect1 & vect2 are (1:2,0:mpw) and the element (1:2,0) MUST be set to zero. The current implementation if not compatible with TR-symmetry (i.e. istwfk/=1) !
SOURCE
4234 subroutine overlap_g(doti,dotr,mpw,npw_k1,npw_k2,nspinor,pwind_k,vect1,vect2) 4235 4236 !Arguments ------------------------------------ 4237 !scalars 4238 integer,intent(in) :: mpw,npw_k1,npw_k2,nspinor 4239 real(dp),intent(out) :: doti,dotr 4240 !arrays 4241 integer,intent(in) :: pwind_k(mpw) 4242 real(dp),intent(in) :: vect1(1:2,0:mpw*nspinor),vect2(1:2,0:mpw*nspinor) 4243 4244 !Local variables------------------------------- 4245 !scalars 4246 integer :: ipw,ispinor,jpw,spnshft1,spnshft2 4247 4248 ! ************************************************************************* 4249 4250 !Check if vect1(:,0) = 0 and vect2(:,0) = 0 4251 if ((abs(vect1(1,0)) > tol12).or.(abs(vect1(2,0)) > tol12).or. & 4252 & (abs(vect2(1,0)) > tol12).or.(abs(vect2(2,0)) > tol12)) then 4253 ABI_BUG('vect1(:,0) and/or vect2(:,0) are not equal to zero') 4254 end if 4255 4256 !Compute the scalar product 4257 dotr = zero; doti = zero 4258 do ispinor = 1, nspinor 4259 spnshft1 = (ispinor-1)*npw_k1 4260 spnshft2 = (ispinor-1)*npw_k2 4261 !$OMP PARALLEL DO PRIVATE(jpw) REDUCTION(+:doti,dotr) 4262 do ipw = 1, npw_k1 4263 jpw = pwind_k(ipw) 4264 dotr = dotr + vect1(1,spnshft1+ipw)*vect2(1,spnshft2+jpw) + vect1(2,spnshft1+ipw)*vect2(2,spnshft2+jpw) 4265 doti = doti + vect1(1,spnshft1+ipw)*vect2(2,spnshft2+jpw) - vect1(2,spnshft1+ipw)*vect2(1,spnshft2+jpw) 4266 end do 4267 end do 4268 4269 end subroutine overlap_g
m_cgtools/projbd [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
projbd
FUNCTION
Project out vector "direc" onto the bands contained in "cg". if useoverlap==0 New direc=direc-$sum_{j/=i} { <cg_{j}|direc>.|cg_{j}> }$ if useoverlap==1 (use of overlap matrix S) New direc=direc-$sum_{j/=i} { <cg_{j}|S|direc>.|cg_{j}> }$ (index i can be set to -1 to sum over all bands)
INPUTS
cg(2,mcg)=wavefunction coefficients for ALL bands iband0=which particular band we are interested in ("i" in the above formula) Can be set to -1 to sum over all bands... icg=shift to be given to the location of the data in cg iscg=shift to be given to the location of the data in scg istwf_k=option parameter that describes the storage of wfs mcg=maximum size of second dimension of cg mscg=maximum size of second dimension of scg nband=number of bands npw=number of planewaves nspinor=number of spinorial components (on current proc) scg(2,mscg*useoverlap)=<G|S|band> for ALL bands, where S is an overlap matrix scprod_io=0 if scprod array has to be computed; 1 if it is input (already in memory) tim_projbd=timing code of the calling subroutine(can be set to 0 if not attributed) useoverlap=describe the overlap of wavefunctions: 0: no overlap (S=Identity_matrix) 1: wavefunctions are overlapping me_g0=1 if this processors treats G=0, 0 otherwise. comm=MPI communicator (used if G vectors are distributed.
SIDE EFFECTS
direc(2,npw)= input: vector to be orthogonalised with respect to cg (and S) output: vector that has been orthogonalized wrt cg (and S) scprod(2,nband)=scalar_product if useoverlap==0: scalar_product_i=$<cg_{j}|direc_{i}>$ if useoverlap==1: scalar_product_i=$<cg_{j}|S|direc_{i}>$ if scprod_io=0, scprod is output if scprod_io=1, scprod is input
NOTES
1) MPIWF Might have to be recoded for efficient paralellism 2) The new version employs BLAS2 routine so that the OMP parallelism is delegated to BLAS library. May use BLAS3 if multiple wavefunctions are optimized at the same time. 3) Note for PAW: ref.= PRB 73, 235101 (2006) [[cite:Audouze2006]], equations (71) and (72): in normal use, projbd applies P_c projector if cg and scg are inverted, projbd applies P_c+ projector 4) cg_zgemv wraps ZGEMM whose implementation is more efficient, especially in the threaded case.
SOURCE
3025 subroutine projbd(cg,direc,iband0,icg,iscg,istwf_k,mcg,mscg,nband,& 3026 npw,nspinor,scg,scprod,scprod_io,tim_projbd,useoverlap,me_g0,comm) 3027 3028 !Arguments ------------------------------------ 3029 !scalars 3030 integer,intent(in) :: iband0,icg,iscg,istwf_k,mcg,mscg,nband,npw,nspinor 3031 integer,intent(in) :: scprod_io,tim_projbd,useoverlap,me_g0,comm 3032 !arrays 3033 real(dp),intent(in) :: cg(2,mcg),scg(2,mscg*useoverlap) 3034 real(dp),intent(inout) :: direc(2,npw*nspinor) 3035 real(dp),intent(inout) :: scprod(2,nband) 3036 3037 !Local variables------------------------------- 3038 !scalars 3039 integer :: nbandm,npw_sp,ierr 3040 !arrays 3041 real(dp) :: tsec(2),bkp_scprod(2),bkp_dirg0(2) 3042 3043 ! ************************************************************************* 3044 3045 call timab(210+tim_projbd,1,tsec) 3046 3047 npw_sp=npw*nspinor 3048 3049 nbandm=nband 3050 3051 if (istwf_k==1) then 3052 3053 if (scprod_io==0) then 3054 if (useoverlap==1) then 3055 call cg_zgemv("C",npw_sp,nbandm,scg(1,iscg+1),direc,scprod) 3056 else 3057 call cg_zgemv("C",npw_sp,nbandm,cg(1,icg+1), direc,scprod) 3058 end if 3059 call xmpi_sum(scprod,comm,ierr) 3060 end if 3061 3062 if (iband0>0.and.iband0<=nbandm) then 3063 bkp_scprod = scprod(:,iband0) 3064 scprod(:,iband0) = zero 3065 end if 3066 3067 call cg_zgemv("N",npw_sp,nbandm,cg(1,icg+1),scprod,direc,alpha=-cg_cone,beta=cg_cone) 3068 3069 if (iband0>0.and.iband0<=nbandm) scprod(:,iband0) = bkp_scprod ! Restore previous value as scprod is output. 3070 3071 else if (istwf_k>=2) then 3072 ! 3073 ! u_{G0/2}(G) = u_{G0/2}(-G-G0)^*; k = G0/2 3074 ! hence: 3075 ! sum_G f*(G) g(G) = 2 REAL sum_G^{IZONE} w(G) f*(G)g(G) 3076 ! where 3077 ! w(G) = 1 except for k=0 and G=0 where w(G=0) = 1/2. 3078 ! 3079 if (scprod_io==0) then 3080 3081 if (useoverlap==1) then 3082 3083 if (istwf_k==2 .and. me_g0==1) then 3084 bkp_dirg0 = direc(:,1) 3085 direc(1,1) = half * direc(1,1) 3086 direc(2,1) = zero 3087 end if 3088 3089 call cg_zgemv("C",npw_sp,nbandm,scg(1,iscg+1),direc,scprod) 3090 scprod = two * scprod 3091 scprod(2,:) = zero 3092 3093 if(istwf_k==2 .and. me_g0==1) direc(:,1) = bkp_dirg0 3094 3095 else 3096 3097 if (istwf_k==2 .and. me_g0==1) then 3098 bkp_dirg0 = direc(:,1) 3099 direc(1,1) = half * direc(1,1) 3100 direc(2,1) = zero 3101 end if 3102 3103 call cg_zgemv("C",npw_sp,nbandm,cg(1,icg+1),direc,scprod) 3104 scprod = two * scprod 3105 scprod(2,:) = zero 3106 3107 if (istwf_k==2 .and. me_g0==1) direc(:,1) = bkp_dirg0 3108 end if ! useoverlap 3109 3110 call xmpi_sum(scprod,comm,ierr) 3111 end if 3112 3113 if (iband0>0.and.iband0<=nbandm) then 3114 bkp_scprod = scprod(:,iband0) 3115 scprod(:,iband0) = zero 3116 end if 3117 3118 call cg_zgemv("N",npw_sp,nbandm,cg(1,icg+1),scprod,direc,alpha=-cg_cone,beta=cg_cone) 3119 3120 if (iband0>0.and.iband0<=nbandm) scprod(:,iband0) = bkp_scprod ! Restore previous value as scprod is output. 3121 3122 end if ! Test on istwf_k 3123 3124 call timab(210+tim_projbd,2,tsec) 3125 3126 end subroutine projbd
m_cgtools/pw_orthon [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
pw_orthon
FUNCTION
Normalize nvec complex vectors each of length nelem and then orthogonalize by modified Gram-Schmidt. Two orthogonality conditions are available: Simple orthogonality: ${<Vec_{i}|Vec_{j}>=Delta_ij}$ Orthogonality with overlap S: ${<Vec_{i}|S|Vec_{j}>=Delta_ij}$
INPUTS
icg=shift to be given to the location of the data in cg(=vecnm) igsc=shift to be given to the location of the data in gsc(=ovl_vecnm) istwf_k=option parameter that describes the storage of wfs mcg=maximum size of second dimension of cg(=vecnm) mgsc=maximum size of second dimension of gsc(=ovl_vecnm) nelem=number of complex elements in each vector nvec=number of vectors to be orthonormalized ortalgo= option for the choice of the algorithm -1: no orthogonalization (direct return) 0 or 2: old algorithm (use of buffers) 1: new algorithm (use of blas) 3: new new algorithm (use of lapack without copy) useoverlap=select the orthogonality condition 0: no overlap between vectors 1: vectors are overlapping me_g0=1 if this processor has G=0, 0 otherwise comm=MPI communicator
SIDE EFFECTS
vecnm= input: vectors to be orthonormalized; array of nvec column vectors,each of length nelem,shifted by icg This array is complex or else real(dp) of twice length output: orthonormalized set of vectors if (useoverlap==1) only: ovl_vecnm= input: product of overlap and input vectors: S|vecnm>,where S is the overlap operator output: updated S|vecnm> according to vecnm
NOTES
Note that each vector has an arbitrary phase which is not fixed in this routine. WARNING: not yet suited for nspinor=2 with istwfk/=1
SOURCE
4761 subroutine pw_orthon(icg, igsc, istwf_k, mcg, mgsc, nelem, nvec, ortalgo, ovl_vecnm, useoverlap, vecnm, me_g0, comm) 4762 4763 use m_abi_linalg 4764 4765 !Arguments ------------------------------------ 4766 !scalars 4767 integer,intent(in) :: icg,igsc,istwf_k,mcg,mgsc,nelem,nvec,ortalgo,useoverlap,me_g0,comm 4768 !arrays 4769 real(dp),intent(inout) :: ovl_vecnm(2,mgsc*useoverlap),vecnm(2,mcg) 4770 4771 !Local variables------------------------------- 4772 !scalars 4773 integer :: ierr,ii,ii0,ii1,ii2,ivec,ivec2 4774 integer :: rvectsiz,vectsize,cg_idx,gsc_idx 4775 real(dp) :: doti,dotr,sum,xnorm 4776 !real(dp) :: cpu, wall, gflops 4777 #ifdef DEBUG_MODE 4778 character(len=500) :: msg 4779 #endif 4780 !arrays 4781 integer :: cgindex(nvec), gscindex(nvec) 4782 real(dp) :: buffer2(2),tsec(2) 4783 real(dp),allocatable :: rblockvectorbx(:,:),rblockvectorx(:,:),rgramxbx(:,:) 4784 complex(dpc),allocatable :: cblockvectorbx(:,:),cblockvectorx(:,:), cgramxbx(:,:) 4785 4786 ! ************************************************************************* 4787 4788 #ifdef DEBUG_MODE 4789 !Make sure imaginary part at G=0 vanishes 4790 if (istwf_k == 2 .and. me_g0 == 1) then 4791 do ivec=1,nvec 4792 if(abs(vecnm(2,1+nelem*(ivec-1)+icg))>zero)then 4793 ! if(abs(vecnm(2,1+nelem*(ivec-1)+icg))>tol16)then 4794 write(msg,'(2a,3i0,2es16.6,a,a)')& 4795 ' For istwf_k = 2, observed the following element of vecnm :',ch10,& 4796 nelem,ivec,icg,vecnm(1:2,1+nelem*(ivec-1)+icg), ch10,' with a non-negligible imaginary part.' 4797 ABI_BUG(msg) 4798 end if 4799 end do 4800 end if 4801 #endif 4802 4803 ! Nothing to do if ortalgo=-1 4804 if(ortalgo==-1) return 4805 4806 !call wrtout(std_out, sjoin(" Begin wavefunction orthogonalization with ortalgo:", itoa(ortalgo))) 4807 !call cwtime(cpu, wall, gflops, "start") 4808 4809 do ivec=1,nvec 4810 cgindex(ivec)=nelem*(ivec-1)+icg+1 4811 gscindex(ivec)=nelem*(ivec-1)+igsc+1 4812 end do 4813 4814 if (ortalgo==3) then 4815 ! ========================= 4816 ! First (new new) algorithm 4817 ! ========================= 4818 ! NEW VERSION: avoid copies, use ZHERK for NC 4819 cg_idx = cgindex(1) 4820 if (useoverlap == 1) then 4821 gsc_idx = gscindex(1) 4822 call cgpaw_cholesky(nelem, nvec, vecnm(1,cg_idx), ovl_vecnm(1,gsc_idx), istwf_k, me_g0, comm) 4823 else 4824 call cgnc_cholesky(nelem, nvec, vecnm(1,cg_idx), istwf_k, me_g0, comm, use_gemm=.FALSE.) 4825 end if 4826 4827 else if (ortalgo==1) then 4828 ! ======================= 4829 ! Second (new) algorithm 4830 ! ======================= 4831 ! This first algorithm seems to be more efficient especially in the parallel band-FFT mode. 4832 4833 if(istwf_k==1) then 4834 vectsize=nelem 4835 ABI_MALLOC(cgramxbx,(nvec,nvec)) 4836 ABI_MALLOC(cblockvectorx,(vectsize,nvec)) 4837 ABI_MALLOC(cblockvectorbx,(vectsize,nvec)) 4838 call abi_xcopy(nvec*vectsize,vecnm(:,cgindex(1):cgindex(nvec)-1),1,cblockvectorx,1,x_cplx=2) 4839 if (useoverlap == 1) then 4840 call abi_xcopy(nvec*vectsize,ovl_vecnm(:,gscindex(1):gscindex(nvec)-1),1,cblockvectorbx,1,x_cplx=2) 4841 else 4842 call abi_xcopy(nvec*vectsize,vecnm(:,cgindex(1):cgindex(nvec)-1),1,cblockvectorbx,1,x_cplx=2) 4843 end if 4844 call abi_xorthonormalize(cblockvectorx,cblockvectorbx,nvec,comm,cgramxbx,vectsize) 4845 call abi_xcopy(nvec*vectsize,cblockvectorx,1,vecnm(:,cgindex(1):cgindex(nvec)-1),1,x_cplx=2) 4846 if (useoverlap == 1) then 4847 call abi_xtrsm('r','u','n','n',vectsize,nvec,cone,cgramxbx,nvec,cblockvectorbx,vectsize) 4848 call abi_xcopy(nvec*vectsize,cblockvectorbx,1,ovl_vecnm(:,gscindex(1):gscindex(nvec)-1),1,x_cplx=2) 4849 end if 4850 ABI_FREE(cgramxbx) 4851 ABI_FREE(cblockvectorx) 4852 ABI_FREE(cblockvectorbx) 4853 4854 else if (istwf_k==2) then 4855 ! Pack real and imaginary part of the wavefunctions. 4856 rvectsiz=nelem 4857 vectsize=2*nelem; if(me_g0==1) vectsize=vectsize-1 4858 ABI_MALLOC(rgramxbx,(nvec,nvec)) 4859 ABI_MALLOC(rblockvectorx,(vectsize,nvec)) 4860 ABI_MALLOC(rblockvectorbx,(vectsize,nvec)) 4861 do ivec=1,nvec 4862 if (me_g0 == 1) then 4863 call abi_xcopy(1,vecnm(1,cgindex(ivec)),1,rblockvectorx (1,ivec),1) 4864 call abi_xcopy(rvectsiz-1,vecnm(1,cgindex(ivec)+1),2,rblockvectorx(2,ivec),1) 4865 call abi_xcopy(rvectsiz-1,vecnm(2,cgindex(ivec)+1),2,rblockvectorx(rvectsiz+1,ivec),1) 4866 if (useoverlap == 1) then 4867 call abi_xcopy(1,ovl_vecnm(1,gscindex(ivec)),1,rblockvectorbx(1,ivec),1) 4868 call abi_xcopy(rvectsiz-1,ovl_vecnm(1,gscindex(ivec)+1),2,rblockvectorbx(2,ivec),1) 4869 call abi_xcopy(rvectsiz-1,ovl_vecnm(2,gscindex(ivec)+1),2,rblockvectorbx(rvectsiz+1,ivec),1) 4870 else 4871 call abi_xcopy(1,vecnm(1,cgindex(ivec)),1,rblockvectorbx(1,ivec),1) 4872 call abi_xcopy(rvectsiz-1,vecnm(1,cgindex(ivec)+1),2,rblockvectorbx(2,ivec),1) 4873 call abi_xcopy(rvectsiz-1,vecnm(2,cgindex(ivec)+1),2,rblockvectorbx(rvectsiz+1,ivec),1) 4874 end if 4875 rblockvectorx (2:vectsize,ivec)=rblockvectorx (2:vectsize,ivec)*sqrt2 4876 rblockvectorbx(2:vectsize,ivec)=rblockvectorbx(2:vectsize,ivec)*sqrt2 4877 else 4878 call abi_xcopy(rvectsiz,vecnm(1,cgindex(ivec)),2,rblockvectorx(1,ivec),1) 4879 call abi_xcopy(rvectsiz,vecnm(2,cgindex(ivec)),2,rblockvectorx(rvectsiz+1,ivec),1) 4880 if (useoverlap == 1) then 4881 call abi_xcopy(rvectsiz,ovl_vecnm(1,gscindex(ivec)),2,rblockvectorbx(1,ivec),1) 4882 call abi_xcopy(rvectsiz,ovl_vecnm(2,gscindex(ivec)),2,rblockvectorbx(rvectsiz+1,ivec),1) 4883 else 4884 call abi_xcopy(rvectsiz,vecnm(1,cgindex(ivec)),2,rblockvectorbx(1,ivec),1) 4885 call abi_xcopy(rvectsiz,vecnm(2,cgindex(ivec)),2,rblockvectorbx(rvectsiz+1,ivec),1) 4886 end if 4887 rblockvectorx (1:vectsize,ivec)=rblockvectorx (1:vectsize,ivec)*sqrt2 4888 rblockvectorbx(1:vectsize,ivec)=rblockvectorbx(1:vectsize,ivec)*sqrt2 4889 end if 4890 end do 4891 4892 call ortho_reim(rblockvectorx,rblockvectorbx,nvec,comm,rgramxbx,vectsize) 4893 4894 do ivec=1,nvec 4895 ! Unpack results 4896 if (me_g0 == 1) then 4897 call abi_xcopy(1,rblockvectorx(1,ivec),1,vecnm(1,cgindex(ivec)),1) 4898 vecnm(2,cgindex(ivec))=zero 4899 rblockvectorx(2:vectsize,ivec)=rblockvectorx(2:vectsize,ivec)/sqrt2 4900 call abi_xcopy(rvectsiz-1,rblockvectorx(2,ivec),1,vecnm(1,cgindex(ivec)+1),2) 4901 call abi_xcopy(rvectsiz-1,rblockvectorx(rvectsiz+1,ivec),1,vecnm(2,cgindex(ivec)+1),2) 4902 else 4903 rblockvectorx(1:vectsize,ivec)=rblockvectorx(1:vectsize,ivec)/sqrt2 4904 call abi_xcopy(rvectsiz,rblockvectorx(1,ivec),1,vecnm(1,cgindex(ivec)),2) 4905 call abi_xcopy(rvectsiz,rblockvectorx(rvectsiz+1,ivec),1,vecnm(2,cgindex(ivec)),2) 4906 end if 4907 4908 if(useoverlap == 1) then 4909 call abi_xtrsm('r','u','n','n',vectsize,nvec,one,rgramxbx,nvec,rblockvectorbx,vectsize) 4910 if (me_g0 == 1) then 4911 call abi_xcopy(1,rblockvectorbx(1,ivec),1,ovl_vecnm(1,gscindex(ivec)),1) 4912 ovl_vecnm(2,gscindex(ivec))=zero 4913 rblockvectorbx(2:vectsize,ivec)=rblockvectorbx(2:vectsize,ivec)/sqrt2 4914 call abi_xcopy(rvectsiz-1,rblockvectorbx(2,ivec),1,ovl_vecnm(1,gscindex(ivec)+1),2) 4915 call abi_xcopy(rvectsiz-1,rblockvectorbx(rvectsiz+1,ivec),1,ovl_vecnm(2,gscindex(ivec)+1),2) 4916 else 4917 rblockvectorbx(1:vectsize,ivec)=rblockvectorbx(1:vectsize,ivec)/sqrt2 4918 call abi_xcopy(rvectsiz,rblockvectorbx(1,ivec),1,ovl_vecnm(1,gscindex(ivec)),2) 4919 call abi_xcopy(rvectsiz,rblockvectorbx(rvectsiz+1,ivec),1,ovl_vecnm(2,gscindex(ivec)),2) 4920 end if 4921 end if 4922 end do 4923 ABI_FREE(rgramxbx) 4924 ABI_FREE(rblockvectorx) 4925 ABI_FREE(rblockvectorbx) 4926 end if 4927 4928 else if (ortalgo==4) then 4929 ! else if (ANY(ortalgo==(/0,2/))) then 4930 4931 cg_idx = cgindex(1) 4932 if (useoverlap==0) then 4933 call cgnc_gramschmidt(nelem,nvec,vecnm(1,cg_idx),istwf_k,me_g0,comm) 4934 else 4935 gsc_idx = gscindex(1) 4936 call cgpaw_gramschmidt(nelem,nvec,vecnm(1,cg_idx),ovl_vecnm(1,gsc_idx),istwf_k,me_g0,comm) 4937 end if 4938 4939 else if (ANY(ortalgo==(/0,2/))) then 4940 ! ======================= 4941 ! Third (old) algorithm 4942 ! ======================= 4943 ! TODO: This algo should be removed. Ref files should be updated though. 4944 4945 do ivec=1,nvec 4946 ! Normalize each vecnm(n,m) in turn: 4947 4948 if (useoverlap==1) then ! Using overlap S... 4949 if(istwf_k/=2)then 4950 sum=zero;ii0=1 4951 else 4952 if (me_g0 ==1) then 4953 sum=half*ovl_vecnm(1,1+nelem*(ivec-1)+igsc)*vecnm(1,1+nelem*(ivec-1)+icg) 4954 ii0=2 4955 else 4956 sum=zero;ii0=1 4957 end if 4958 end if 4959 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:sum) SHARED(icg,ivec,nelem,vecnm) 4960 do ii=ii0+nelem*(ivec-1),nelem*ivec 4961 sum=sum+vecnm(1,ii+icg)*ovl_vecnm(1,ii+igsc)+vecnm(2,ii+icg)*ovl_vecnm(2,ii+igsc) 4962 end do 4963 4964 else ! Without overlap... 4965 if(istwf_k/=2)then 4966 sum=zero;ii0=1 4967 else 4968 if (me_g0 ==1) then 4969 sum=half*vecnm(1,1+nelem*(ivec-1)+icg)**2 4970 ii0=2 4971 else 4972 sum=zero;ii0=1 4973 end if 4974 end if 4975 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:sum) SHARED(icg,ivec,nelem,vecnm) 4976 do ii=ii0+nelem*(ivec-1)+icg,nelem*ivec+icg 4977 sum=sum+vecnm(1,ii)**2+vecnm(2,ii)**2 4978 end do 4979 end if 4980 4981 call timab(48,1,tsec) 4982 call xmpi_sum(sum,comm,ierr) 4983 call timab(48,2,tsec) 4984 4985 if(istwf_k>=2)sum=two*sum 4986 xnorm = sqrt(abs(sum)) ; sum=1.0_dp/xnorm 4987 !$OMP PARALLEL DO PRIVATE(ii) SHARED(icg,ivec,nelem,sum,vecnm) 4988 do ii=1+nelem*(ivec-1)+icg,nelem*ivec+icg 4989 vecnm(1,ii)=vecnm(1,ii)*sum 4990 vecnm(2,ii)=vecnm(2,ii)*sum 4991 end do 4992 if (useoverlap==1) then 4993 !$OMP PARALLEL DO PRIVATE(ii) SHARED(icg,ivec,nelem,sum,ovl_vecnm) 4994 do ii=1+nelem*(ivec-1)+igsc,nelem*ivec+igsc 4995 ovl_vecnm(1,ii)=ovl_vecnm(1,ii)*sum 4996 ovl_vecnm(2,ii)=ovl_vecnm(2,ii)*sum 4997 end do 4998 end if 4999 5000 ! Remove projection in all higher states. 5001 if (ivec<nvec) then 5002 5003 if(istwf_k==1)then 5004 ! Cannot use time-reversal symmetry 5005 5006 if (useoverlap==1) then ! Using overlap. 5007 do ivec2=ivec+1,nvec 5008 ! First compute scalar product 5009 dotr=zero ; doti=zero 5010 ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+igsc 5011 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:doti,dotr) SHARED(ii1,ii2,nelem,vecnm) 5012 do ii=1,nelem 5013 dotr=dotr+vecnm(1,ii1+ii)*ovl_vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*ovl_vecnm(2,ii2+ii) 5014 doti=doti+vecnm(1,ii1+ii)*ovl_vecnm(2,ii2+ii)-vecnm(2,ii1+ii)*ovl_vecnm(1,ii2+ii) 5015 end do 5016 5017 call timab(48,1,tsec) 5018 buffer2(1)=doti;buffer2(2)=dotr 5019 call xmpi_sum(buffer2,comm,ierr) 5020 call timab(48,2,tsec) 5021 doti=buffer2(1) 5022 dotr=buffer2(2) 5023 5024 ! Then subtract the appropriate amount of the lower state 5025 ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg 5026 #ifdef FC_INTEL 5027 ! DIR$ ivdep 5028 #endif 5029 !$OMP PARALLEL DO PRIVATE(ii) SHARED(doti,dotr,ii1,ii2,nelem,vecnm) 5030 do ii=1,nelem 5031 vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii)+doti*vecnm(2,ii1+ii) 5032 vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-doti*vecnm(1,ii1+ii)-dotr*vecnm(2,ii1+ii) 5033 end do 5034 5035 ii1=nelem*(ivec-1)+igsc;ii2=nelem*(ivec2-1)+igsc 5036 do ii=1,nelem 5037 ovl_vecnm(1,ii2+ii)=ovl_vecnm(1,ii2+ii)& 5038 & -dotr*ovl_vecnm(1,ii1+ii)& 5039 & +doti*ovl_vecnm(2,ii1+ii) 5040 ovl_vecnm(2,ii2+ii)=ovl_vecnm(2,ii2+ii)& 5041 -doti*ovl_vecnm(1,ii1+ii)& 5042 & -dotr*ovl_vecnm(2,ii1+ii) 5043 end do 5044 end do 5045 else 5046 ! ----- No overlap ----- 5047 do ivec2=ivec+1,nvec 5048 ! First compute scalar product 5049 dotr=zero ; doti=zero 5050 ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg 5051 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:doti,dotr) SHARED(ii1,ii2,nelem,vecnm) 5052 do ii=1,nelem 5053 dotr=dotr+vecnm(1,ii1+ii)*vecnm(1,ii2+ii)+& 5054 & vecnm(2,ii1+ii)*vecnm(2,ii2+ii) 5055 doti=doti+vecnm(1,ii1+ii)*vecnm(2,ii2+ii)-& 5056 & vecnm(2,ii1+ii)*vecnm(1,ii2+ii) 5057 end do 5058 ! Init mpi_comm 5059 buffer2(1)=doti 5060 buffer2(2)=dotr 5061 call timab(48,1,tsec) 5062 call xmpi_sum(buffer2,comm,ierr) 5063 ! call xmpi_sum(doti,spaceComm,ierr) 5064 ! call xmpi_sum(dotr,spaceComm,ierr) 5065 call timab(48,2,tsec) 5066 doti=buffer2(1) 5067 dotr=buffer2(2) 5068 5069 ! Then subtract the appropriate amount of the lower state 5070 #ifdef FC_INTEL 5071 ! DIR$ ivdep 5072 #endif 5073 !$OMP PARALLEL DO PRIVATE(ii) SHARED(doti,dotr,ii1,ii2,nelem,vecnm) 5074 do ii=1,nelem 5075 vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii)+& 5076 & doti*vecnm(2,ii1+ii) 5077 vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-doti*vecnm(1,ii1+ii)-& 5078 & dotr*vecnm(2,ii1+ii) 5079 end do 5080 end do 5081 5082 end if ! Test on useoverlap 5083 5084 else if(istwf_k==2)then 5085 ! At gamma point use of time-reversal symmetry saves cpu time. 5086 5087 if (useoverlap==1) then 5088 ! ----- Using overlap ----- 5089 do ivec2=ivec+1,nvec 5090 ! First compute scalar product 5091 ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+igsc 5092 if (me_g0 ==1) then 5093 dotr=half*vecnm(1,ii1+1)*ovl_vecnm(1,ii2+1) 5094 ! Avoid double counting G=0 contribution 5095 ! Imaginary part of vecnm at G=0 should be zero,so only take real part 5096 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm) 5097 do ii=2,nelem 5098 dotr=dotr+vecnm(1,ii1+ii)*ovl_vecnm(1,ii2+ii)+& 5099 & vecnm(2,ii1+ii)*ovl_vecnm(2,ii2+ii) 5100 end do 5101 else 5102 dotr=0._dp 5103 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm) 5104 do ii=1,nelem 5105 dotr=dotr+vecnm(1,ii1+ii)*ovl_vecnm(1,ii2+ii)+& 5106 & vecnm(2,ii1+ii)*ovl_vecnm(2,ii2+ii) 5107 end do 5108 end if 5109 5110 dotr=two*dotr 5111 5112 call timab(48,1,tsec) 5113 call xmpi_sum(dotr,comm,ierr) 5114 call timab(48,2,tsec) 5115 5116 ! Then subtract the appropriate amount of the lower state 5117 ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg 5118 #ifdef FC_INTEL 5119 ! DIR$ ivdep 5120 #endif 5121 !$OMP PARALLEL DO PRIVATE(ii) SHARED(dotr,ii1,ii2,nelem,vecnm) 5122 do ii=1,nelem 5123 vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii) 5124 vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-dotr*vecnm(2,ii1+ii) 5125 end do 5126 ii1=nelem*(ivec-1)+igsc;ii2=nelem*(ivec2-1)+igsc 5127 do ii=1,nelem 5128 ovl_vecnm(1,ii2+ii)=ovl_vecnm(1,ii2+ii)-dotr*ovl_vecnm(1,ii1+ii) 5129 ovl_vecnm(2,ii2+ii)=ovl_vecnm(2,ii2+ii)-dotr*ovl_vecnm(2,ii1+ii) 5130 end do 5131 end do 5132 else 5133 ! ----- No overlap ----- 5134 do ivec2=ivec+1,nvec 5135 ! First compute scalar product 5136 ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg 5137 if (me_g0 ==1) then 5138 ! Avoid double counting G=0 contribution 5139 ! Imaginary part of vecnm at G=0 should be zero,so only take real part 5140 dotr=half*vecnm(1,ii1+1)*vecnm(1,ii2+1) 5141 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm) 5142 do ii=2,nelem 5143 dotr=dotr+vecnm(1,ii1+ii)*vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*vecnm(2,ii2+ii) 5144 end do 5145 else 5146 dotr=0._dp 5147 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm) 5148 do ii=1,nelem 5149 dotr=dotr+vecnm(1,ii1+ii)*vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*vecnm(2,ii2+ii) 5150 end do 5151 end if 5152 dotr=two*dotr 5153 5154 call timab(48,1,tsec) 5155 call xmpi_sum(dotr,comm,ierr) 5156 call timab(48,2,tsec) 5157 5158 ! Then subtract the appropriate amount of the lower state 5159 #ifdef FC_INTEL 5160 ! DIR$ ivdep 5161 #endif 5162 !$OMP PARALLEL DO PRIVATE(ii) SHARED(dotr,ii1,ii2,nelem,vecnm) 5163 do ii=1,nelem 5164 vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii) 5165 vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-dotr*vecnm(2,ii1+ii) 5166 end do 5167 end do 5168 end if ! Test on useoverlap 5169 5170 else 5171 ! At other special points,use of time-reversal symmetry saves cpu time. 5172 5173 if (useoverlap==1) then 5174 ! ----- Using overlap ----- 5175 do ivec2=ivec+1,nvec 5176 ! First compute scalar product 5177 ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+igsc 5178 ! Avoid double counting G=0 contribution 5179 ! Imaginary part of vecnm at G=0 should be zero,so only take real part 5180 dotr=zero 5181 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm) 5182 do ii=1,nelem 5183 dotr=dotr+vecnm(1,ii1+ii)*ovl_vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*ovl_vecnm(2,ii2+ii) 5184 end do 5185 dotr=two*dotr 5186 5187 call timab(48,1,tsec) 5188 call xmpi_sum(dotr,comm,ierr) 5189 call timab(48,2,tsec) 5190 5191 ! Then subtract the appropriate amount of the lower state 5192 ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg 5193 #ifdef FC_INTEL 5194 ! DIR$ ivdep 5195 #endif 5196 !$OMP PARALLEL DO PRIVATE(ii) SHARED(dotr,ii1,ii2,nelem,vecnm) 5197 do ii=1,nelem 5198 vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii) 5199 vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-dotr*vecnm(2,ii1+ii) 5200 end do 5201 ii1=nelem*(ivec-1)+igsc;ii2=nelem*(ivec2-1)+igsc 5202 do ii=1,nelem 5203 ovl_vecnm(1,ii2+ii)=ovl_vecnm(1,ii2+ii)-dotr*ovl_vecnm(1,ii1+ii) 5204 ovl_vecnm(2,ii2+ii)=ovl_vecnm(2,ii2+ii)-dotr*ovl_vecnm(2,ii1+ii) 5205 end do 5206 end do 5207 else 5208 ! ----- No overlap ----- 5209 do ivec2=ivec+1,nvec 5210 ! First compute scalar product 5211 ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg 5212 ! Avoid double counting G=0 contribution 5213 ! Imaginary part of vecnm at G=0 should be zero,so only take real part 5214 dotr=zero 5215 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm) 5216 do ii=1,nelem 5217 dotr=dotr+vecnm(1,ii1+ii)*vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*vecnm(2,ii2+ii) 5218 end do 5219 dotr=two*dotr 5220 5221 call timab(48,1,tsec) 5222 call xmpi_sum(dotr,comm,ierr) 5223 call timab(48,2,tsec) 5224 5225 ! Then subtract the appropriate amount of the lower state 5226 !$OMP PARALLEL DO PRIVATE(ii) SHARED(dotr,ii1,ii2,nelem,vecnm) 5227 do ii=1,nelem 5228 vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii) 5229 vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-dotr*vecnm(2,ii1+ii) 5230 end do 5231 end do 5232 end if 5233 5234 end if ! End use of time-reversal symmetry 5235 end if ! Test on "ivec" 5236 end do ! end loop over vectors (or bands) with index ivec : 5237 5238 else 5239 ABI_ERROR(sjoin("Wrong value for ortalgo:", itoa(ortalgo))) 5240 end if 5241 5242 !call cwtime_report(sjoin(" pw_orthon with ortalgo: ", itoa(ortalgo)), cpu, wall, gflops) 5243 5244 end subroutine pw_orthon
m_cgtools/pw_orthon_paw [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
pw_orthon_paw
FUNCTION
Normalize nvec complex vectors each of length nelem and then orthogonalize by modified Gram-Schmidt. The overlap matrix <c_m|S|c_n> (S can be identity) has to be provided as input, and is overwritten.
INPUTS
icg=shift to be given to the location of the data in cg(=vecnm) mcg=maximum size of second dimension of cg(=vecnm) nelem=number of complex elements in each vector nspinor=number of spinorial components of the wavefunctions (on current proc) nvec=number of vectors to be orthonormalized ortalgo= option for the choice of the algorithm -1: no orthogonalization (direct return) 0: do orthogonalization comm=MPI communicator
SIDE EFFECTS
cprj(optional)=<p_i|c_n> coefficients, updated to keep them consistent with the WF at output ovl_mat=overlap matrix <c_m|S|c_n> for m<=n vecnm= input: vectors to be orthonormalized; array of nvec column vectors,each of length nelem,shifted by icg This array is complex or else real(dp) of twice length output: orthonormalized set of vectors
NOTES
Note that each vector has an arbitrary phase which is not fixed in this routine.
SOURCE
5279 subroutine pw_orthon_cprj(icg,mcg,nelem,nspinor,nvec,ortalgo,ovl_mat,vecnm,cprj) 5280 5281 use m_abi_linalg 5282 5283 !Arguments ------------------------------------ 5284 !scalars 5285 integer,intent(in) :: icg,mcg,nelem,nspinor,nvec,ortalgo 5286 !arrays 5287 real(dp),intent(inout) :: ovl_mat(nvec*(nvec+1)),vecnm(2,mcg) 5288 type(pawcprj_type),intent(inout),optional,target :: cprj(:,:) 5289 5290 !Local variables------------------------------- 5291 !scalars 5292 logical :: do_cprj 5293 integer :: ii,ii1,ii2,ivec,ivec2,ivec3,iv1l,iv2l,iv3l,iv1r,iv2r,iv3r,ncprj 5294 real(dp) :: doti,dotr,summ,xnorm 5295 !arrays 5296 real(dp) :: ovl_row_tmp(2*nvec),ovl_col_tmp(2*nvec) 5297 real(dp) :: re,im 5298 5299 ! ************************************************************************* 5300 5301 !Nothing to do if ortalgo=-1 5302 if(ortalgo==-1) return 5303 5304 do_cprj=.false. 5305 if (present(cprj)) then 5306 do_cprj=.true. 5307 ncprj = size(cprj,2) 5308 if (ncprj/=nspinor*nvec) then 5309 ABI_ERROR('bad size for cprj') 5310 end if 5311 end if 5312 5313 ! The overlap matrix is : ovl(i,j) = <psi_i|S|psi_j> = (<psi_j|S|psi_i>)^* 5314 ! The row index stands for the "left" band index 5315 ! The column index stands for the "right" band index 5316 ! Only the upper triangular part of the (complex) overlap matrix is stored, so only elements with i<=j. 5317 ! They are stored in the following order: ovl(1,1),ovl(1,2),ovl(2,2),ovl(1,3),ovl(2,3),... 5318 ! so: 5319 ! -- shift for the ith row : 2.(i.(i-1)/2) = i.(i-1) 5320 ! -- shift for the ith column : 2.(i-1)+1 = 2.i-1 5321 ! => index of real part of elem in the jth column and ith row (=ovl(i,j)) : 2.i-1+j.(j-1) (for i<=j) 5322 ! => index of imaginary part = index of real part + 1 5323 ! After orthogonalizing the first n vectors, we have: 5324 ! for i<=n, i<=j : ovl(i,j) = delta_ij 5325 5326 do ivec=1,nvec 5327 5328 ! First we normalize the current vector 5329 iv1r = ivec*(ivec-1) ! ith row 5330 iv1l = 2*ivec-1 ! ith column 5331 ! ovl(i1,i1) = <psi_i1|S|psi_i1> 5332 summ = ovl_mat(iv1r+iv1l) 5333 xnorm = sqrt(abs(summ)) ; summ=1.0_dp/xnorm 5334 !$OMP PARALLEL DO PRIVATE(ii) SHARED(icg,ivec,nelem,summ,vecnm) 5335 do ii=1+nelem*(ivec-1)+icg,nelem*ivec+icg 5336 vecnm(1,ii)=vecnm(1,ii)*summ 5337 vecnm(2,ii)=vecnm(2,ii)*summ 5338 end do 5339 ! Apply the normalization to cprj coeffs 5340 if (do_cprj) call pawcprj_axpby(zero,summ,cprj(:,nspinor*(ivec-1)+1:nspinor*ivec),cprj(:,nspinor*(ivec-1)+1:nspinor*ivec)) 5341 5342 ! As the norm of |psi_i1> changed, we update the overlap matrix accordingly. 5343 ! From previous iterations, we already have: 5344 ! ovl(i2,i1) = <psi_i2|S|psi_i1> = 0 for i2<i1 5345 ! so we need to change only: 5346 ! ovl(i1,i2) = <psi_i1|S|psi_i2> for i1<=i2 5347 do ivec2=ivec,nvec 5348 iv2r=ivec2*(ivec2-1) 5349 if (ivec<ivec2) then 5350 ovl_mat(iv2r+iv1l ) = ovl_mat(iv2r+iv1l )*summ 5351 ovl_mat(iv2r+iv1l+1) = ovl_mat(iv2r+iv1l+1)*summ 5352 else if (ivec==ivec2) then 5353 ovl_mat(iv2r+iv1l ) = ovl_mat(iv2r+iv1l )*summ*summ 5354 ovl_mat(iv2r+iv1l+1) = ovl_mat(iv2r+iv1l+1)*summ*summ 5355 re = ovl_mat(iv2r+iv1l ) 5356 im = ovl_mat(iv2r+iv1l+1) 5357 if (abs(re-1)>tol10.or.abs(im)>tol10) then 5358 write(std_out,'(a,es21.10e3)') '(pw_ortho) ovl (re)',re 5359 write(std_out,'(a,es21.10e3)') '(pw_ortho) ovl (im)',im 5360 ABI_WARNING('In pw_orthon_cprj : the result should be equal to one!') 5361 end if 5362 end if 5363 end do 5364 5365 ! Remove projection in all higher states. 5366 if (ivec<nvec) then 5367 5368 do ivec2=ivec+1,nvec 5369 5370 iv2r = ivec2*(ivec2-1) 5371 iv2l = 2*ivec2-1 5372 ! (dotr,doti) = <psi_i1|S|psi_i2> 5373 dotr = ovl_mat(iv2r+iv1l ) 5374 doti = ovl_mat(iv2r+iv1l+1) 5375 5376 ! Then subtract the appropriate amount of the lower state 5377 ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg 5378 ! |psi'_i2> = |psi_i2> - <psi_i1|S|psi_i2> |psi_i1> 5379 !$OMP PARALLEL DO PRIVATE(ii) SHARED(doti,dotr,ii1,ii2,nelem,vecnm) 5380 do ii=1,nelem 5381 vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii)+doti*vecnm(2,ii1+ii) 5382 vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-doti*vecnm(1,ii1+ii)-dotr*vecnm(2,ii1+ii) 5383 end do 5384 if (do_cprj) call pawcprj_zaxpby((/-dotr,-doti/),(/one,zero/),cprj(:,nspinor*(ivec-1)+1:nspinor*ivec),& 5385 & cprj(:,nspinor*(ivec2-1)+1:nspinor*ivec2)) 5386 ! As |psi_i2> changed, we update the overlap matrix accordingly. 5387 ! We have: <psi'_i3|S|psi'_i2> = <psi'_i3|S|psi_i2> - <psi_i1|S|psi_i2> <psi'_i3|S|psi_i1> 5388 ! Remember that i2>i1. 5389 ! For i3<=i2, we compute the new column i2. 5390 ! For i3<i1: 5391 ! (1) <psi'_i3|S|psi'_i2> = <psi_i3|S|psi_i2> - <psi_i1|S|psi_i2> <psi_i3|S|psi_i1> 5392 ! = <psi_i3|S|psi_i2> 5393 ! as for i3<i1 we have <psi_i3|S|psi_i1> = 0 5394 ! For i1<=i3<i2: 5395 ! (2) <psi'_i3|S|psi'_i2> = <psi_i3|S|psi_i2> - <psi_i1|S|psi_i2> <psi_i3|S|psi_i1> 5396 ! = <psi_i3|S|psi_i2> - <psi_i1|S|psi_i2> (<psi_i1|S|psi_i3>)^* 5397 ! For i3=i2: 5398 ! (3) <psi'_i3|S|psi'_i2> = <psi'_i2|S|psi_i2> - <psi_i1|S|psi_i2> <psi'_i2|S|psi_i1> 5399 ! = <psi_i2|S|psi_i2> - <psi_i1|S|psi_i2> <psi_i2|S|psi_i1> 5400 ! - <psi_i1|S|psi_i2> <psi_i2|S|psi_i1> + <psi_i1|S|psi_i2> <psi_1|S|psi_1> <psi_i2|S|psi_i1> 5401 ! = <psi_i2|S|psi_i2> - <psi_i1|S|psi_i2> <psi_i2|S|psi_i1> 5402 ! = <psi_i2|S|psi_i2> - <psi_i1|S|psi_i2> (<psi_i1|S|psi_i2>)^* 5403 ! so the case i3=i2 (3) is equivalent to the case i1<=i3<i2 (2) with i3=i2. 5404 ! Here we compute (2) and (3) in a temporary array: 5405 do ivec3=ivec,ivec2 5406 iv3r=ivec3*(ivec3-1) 5407 iv3l=2*ivec3-1 5408 ovl_col_tmp(iv3l ) = ovl_mat(iv2r+iv3l ) - dotr*ovl_mat(iv3r+iv1l) - doti*ovl_mat(iv3r+iv1l+1) 5409 ovl_col_tmp(iv3l+1) = ovl_mat(iv2r+iv3l+1) - doti*ovl_mat(iv3r+iv1l) + dotr*ovl_mat(iv3r+iv1l+1) 5410 end do 5411 ! For i2<i3, we compute the new row i2. 5412 ! (4) <psi'_i2|S|psi_i3> = <psi_i2|S|psi_i3> - <psi_i2|S|psi_i1> <psi_i1|S|psi_i3> 5413 ! = <psi_i2|S|psi_i3> - (<psi_i1|S|psi_i2>)^* <psi_i1|S|psi_i3> 5414 ! Here we compute (4) in a temporary array: 5415 do ivec3=ivec2+1,nvec 5416 iv3r=ivec3*(ivec3-1) 5417 iv3l=2*ivec3-1 5418 ovl_row_tmp(iv3l ) = ovl_mat(iv3r+iv2l ) - dotr*ovl_mat(iv3r+iv1l) - doti*ovl_mat(iv3r+iv1l+1) 5419 ovl_row_tmp(iv3l+1) = ovl_mat(iv3r+iv2l+1) + doti*ovl_mat(iv3r+iv1l) - dotr*ovl_mat(iv3r+iv1l+1) 5420 end do 5421 ! We update the column i2 (starting from ivec and not 1, thanks to (1)) 5422 do ivec3=ivec,ivec2 5423 iv3l=2*ivec3-1 5424 ovl_mat(iv2r+iv3l ) = ovl_col_tmp(iv3l ) 5425 ovl_mat(iv2r+iv3l+1) = ovl_col_tmp(iv3l+1) 5426 end do 5427 ! We update the row i2 5428 do ivec3=ivec2+1,nvec 5429 iv3r=ivec3*(ivec3-1) 5430 iv3l=2*ivec3-1 5431 ovl_mat(iv3r+iv2l ) = ovl_row_tmp(iv3l ) 5432 ovl_mat(iv3r+iv2l+1) = ovl_row_tmp(iv3l+1) 5433 end do 5434 end do 5435 5436 end if ! Test on "ivec" 5437 5438 !end loop over vectors (or bands) with index ivec : 5439 end do 5440 5441 end subroutine pw_orthon_cprj
m_cgtools/set_istwfk [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
set_istwfk
FUNCTION
Returns the value of istwfk associated to the input k-point.
INPUTS
kpoint(3)=The k-point in reduced coordinates.
OUTPUT
istwfk= Integer flag internally used in the code to define the storage mode of the wavefunctions. It also define the algorithm used to apply an operator in reciprocal space as well as the FFT algorithm used to go from G- to r-space and vice versa. 1 => time-reversal cannot be used 2 => use time-reversal at the Gamma point. 3 => use time-reversal symmetry for k=(1/2, 0 , 0 ) 4 => use time-reversal symmetry for k=( 0 , 0 ,1/2) 5 => use time-reversal symmetry for k=(1/2, 0 ,1/2) 6 => use time-reversal symmetry for k=( 0 ,1/2, 0 ) 7 => use time-reversal symmetry for k=(1/2,1/2, 0 ) 8 => use time-reversal symmetry for k=( 0 ,1/2,1/2) 9 => use time-reversal symmetry for k=(1/2,1/2,1/2) Useful relations: u_k(G) = u_{k+G0}(G-G0); u_{-k}(-G) = u_k(G)^* and therefore: u_{G0/2}(G) = u_{G0/2}(-G-G0)^*.
SOURCE
902 integer pure function set_istwfk(kpoint) result(istwfk) 903 904 !Arguments ------------------------------------ 905 real(dp),intent(in) :: kpoint(3) 906 907 !Local variables------------------------------- 908 !scalars 909 integer :: bit0,ii 910 !arrays 911 integer :: bit(3) 912 913 ! ************************************************************************* 914 915 bit0=1 916 917 do ii=1,3 918 if (DABS(kpoint(ii))<tol10) then 919 bit(ii)=0 920 else if (DABS(kpoint(ii)-half)<tol10 ) then 921 bit(ii)=1 922 else 923 bit0=0 924 end if 925 end do 926 927 if (bit0==0) then 928 istwfk=1 929 else 930 istwfk=2+bit(1)+4*bit(2)+2*bit(3) ! Note the inversion between bit(2) and bit(3) 931 end if 932 933 end function set_istwfk
m_cgtools/sqnorm_g [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
sqnorm_g
FUNCTION
Compute the square of the norm of one complex vector vecti, in reciprocal space Take into account the storage mode of the vector (istwf_k)
INPUTS
istwf_k=option parameter that describes the storage of wfs npwsp= (effective) number of planewaves at this k point. vect(2,npwsp)=the vector in reciprocal space (npw*nspinor, usually) me_g0=1 if this processors treats G=0, 0 otherwise. comm=MPI communicator for MPI sum.
OUTPUT
dotr= <vect|vect>
SOURCE
956 subroutine sqnorm_g(dotr, istwf_k, npwsp, vect, me_g0, comm) 957 958 !Arguments ------------------------------------ 959 !scalars 960 integer,intent(in) :: istwf_k,npwsp,me_g0,comm 961 real(dp),intent(out) :: dotr 962 !arrays 963 real(dp),intent(in) :: vect(2,npwsp) 964 965 !Local variables------------------------------- 966 !scalars 967 integer :: ierr 968 969 ! ************************************************************************* 970 971 if (istwf_k==1) then ! General k-point 972 !dotr = cg_real_zdotc(npwsp,vect,vect) 973 dotr = cg_dznrm2(npwsp, vect) 974 dotr = dotr * dotr 975 976 else 977 if (istwf_k == 2 .and. me_g0 == 1) then 978 ! Gamma k-point and I have G=0 979 dotr=half*vect(1,1)**2 980 dotr = dotr + cg_real_zdotc(npwsp-1, vect(1,2), vect(1,2)) 981 else 982 ! Other TR k-points 983 dotr = cg_real_zdotc(npwsp, vect, vect) 984 end if 985 dotr=two*dotr 986 end if 987 988 if (xmpi_comm_size(comm)>1) call xmpi_sum(dotr,comm,ierr) 989 990 end subroutine sqnorm_g
m_cgtools/sqnorm_v [ Functions ]
[ Top ] [ m_cgtools ] [ Functions ]
NAME
sqnorm_v
FUNCTION
Compute square of the norm of a potential (integral over FFT grid), to obtain a square residual-like quantity (so the sum of product of values is NOT divided by the number of FFT points, and NOT multiplied by the primitive cell volume). Take into account the spin components of the potentials (nspden), and sum over them.
INPUTS
cplex=if 1, real space function on FFT grid is REAL, if 2, COMPLEX nfft= (effective) number of FFT grid points (for this processor) nspden=number of spin-density components opt_storage: 0, if potential is stored as V^up-up, V^dn-dn, Re[V^up-dn], Im[V^up-dn] 1, if potential is stored as V, B_x, B_y, Bz (B=magn. field) pot(cplex*nfft,nspden)=real space potential on FFT grid
OUTPUT
norm2= value of the square of the norm
SOURCE
1599 subroutine sqnorm_v(cplex,nfft,norm2,nspden,opt_storage,pot,mpi_comm_sphgrid) 1600 1601 !Arguments ------------------------------------ 1602 !scalars 1603 integer,intent(in) :: cplex,nfft,nspden,opt_storage 1604 integer,intent(in),optional :: mpi_comm_sphgrid 1605 real(dp),intent(out) :: norm2 1606 !arrays 1607 real(dp),intent(in) :: pot(cplex*nfft,nspden) 1608 1609 !Local variables------------------------------- 1610 !scalars 1611 integer :: ierr,ifft,ispden,nproc_sphgrid 1612 real(dp) :: ar 1613 1614 ! ************************************************************************* 1615 1616 !Real or complex inputs are coded 1617 1618 norm2=zero 1619 do ispden=1,min(nspden,2) 1620 !$OMP PARALLEL DO PRIVATE(ifft) SHARED(cplex,ispden,nfft,pot) REDUCTION(+:norm2) 1621 do ifft=1,cplex*nfft 1622 norm2=norm2 + pot(ifft,ispden)**2 1623 end do 1624 end do 1625 if (nspden==4) then 1626 ar=zero 1627 do ispden=3,4 1628 !$OMP PARALLEL DO PRIVATE(ifft) SHARED(cplex,ispden,nfft,pot) REDUCTION(+:ar) 1629 do ifft=1,cplex*nfft 1630 ar=ar + pot(ifft,ispden)**2 1631 end do 1632 end do 1633 if (opt_storage==0) then 1634 if (cplex==1) then 1635 norm2=norm2+two*ar 1636 else 1637 norm2=norm2+ar 1638 end if 1639 else 1640 norm2=half*(norm2+ar) 1641 end if 1642 end if 1643 1644 !MPIWF reduction (addition) on norm2 is needed here 1645 if(present(mpi_comm_sphgrid)) then 1646 nproc_sphgrid=xmpi_comm_size(mpi_comm_sphgrid) 1647 if(nproc_sphgrid>1)then 1648 call xmpi_sum(norm2,mpi_comm_sphgrid,ierr) 1649 end if 1650 end if 1651 1652 end subroutine sqnorm_v