./70_gw/m_gwls_hamiltonian.F90

ABINIT/m_gwls_hamiltonian
m_gwls_hamiltonian/build_H
m_gwls_hamiltonian/build_vxc
m_gwls_hamiltonian/destroy_H
m_gwls_hamiltonian/dft_xc_energy
m_gwls_hamiltonian/DistributeValenceKernel
m_gwls_hamiltonian/DistributeValenceWavefunctions
m_gwls_hamiltonian/exchange
m_gwls_hamiltonian/g_to_r
m_gwls_hamiltonian/gr_to_g
m_gwls_hamiltonian/Hpsik
m_gwls_hamiltonian/Hpsikc
m_gwls_hamiltonian/kbkb_to_kb
m_gwls_hamiltonian/pc_k_valence_kernel
m_gwls_hamiltonian/precondition
m_gwls_hamiltonian/precondition_cplx
m_gwls_hamiltonian/set_precondition
m_gwls_hamiltonian/sqrt_vc_k
m_gwls_hamiltonian/unset_precondition
m_gwls_hamiltonian/wf_block_distribute

ABINIT/m_gwls_hamiltonian [ Modules ]

NAME

 m_gwls_hamiltonian

FUNCTION

 Copyright (C) 2009-2024 ABINIT group (JLJ, BR, MC)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

16 #if defined HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19 
20 #include "abi_common.h"
21 
22 
23 module m_gwls_hamiltonian
24 
25 ! local modules
26 use m_gwls_utility
27 use m_gwls_wf
28 use m_dtset
29 
30 ! abinit modules
31 use m_bandfft_kpt
32 use m_cgtools
33 use defs_basis
34 use m_abicore
35 use m_xmpi
36 use m_pawang
37 use m_errors
38 use m_ab7_mixing
39 use m_mpinfo
40 use m_crystal
41 
42 use defs_abitypes,      only : MPI_type
43 use m_io_tools,         only : get_unit
44 use m_hamiltonian,      only : gs_hamiltonian_type, copy_hamiltonian
45 use m_pawcprj,          only : pawcprj_type
46 use m_vcoul,            only : vcoul_t
47 use m_gsphere,          only : gsphere_t
48 use m_bz_mesh,          only : kmesh_t, find_qmesh
49 use m_fft,              only : fftpac, fourwf
50 use m_getghc,           only : getghc
51 use m_io_kss,           only : make_gvec_kss
52 
53 implicit none
54 save
55 private

m_gwls_hamiltonian/build_H [ Functions ]

[ Top ] [ m_gwls_hamiltonian ] [ Functions ]

NAME

  build_H

FUNCTION

INPUTS

OUTPUT

SOURCE

1508 subroutine build_H(dtset2,mpi_enreg2,cpopt2,cg2,gs_hamk2,kg_k2,kinpw2)
1509 
1510 !use m_bandfft_kpt
1511 use m_cgtools
1512 use m_wfutils
1513 
1514 !Arguments of gw_sternheimer, reveived as argument by build_H-------------------------
1515 type(dataset_type),  intent(in) :: dtset2
1516 type(MPI_type),   intent(in) :: mpi_enreg2
1517 type(gs_hamiltonian_type), intent(inout) :: gs_hamk2
1518 
1519 integer, intent(in) :: cpopt2
1520 integer, intent(in) :: kg_k2(3,gs_hamk2%npw_k)
1521 !Since mcg = npw_k*nband when there is only gamma, the next line works. If there is not only Gamma, mcg is the proper size of cg.
1522 real(dp), intent(in) :: cg2(2,dtset2%mpw*dtset2%nspinor*dtset2%mband*dtset2%mkmem*dtset2%nsppol)
1523 real(dp), intent(in) :: kinpw2(gs_hamk2%npw_k)
1524 !Local variables : most of them are in the module for now.
1525 real(dp) :: commutation_error
1526 integer  :: io_unit
1527 integer  :: io_unit_debug
1528 integer  :: ierr
1529 integer  :: cplx
1530 integer  :: j, k
1531 integer  :: dimph3d
1532 integer  :: mb
1533 
1534 integer  :: mpi_communicator
1535 
1536 character(128):: filename_debug
1537 
1538 
1539 real(dp), allocatable :: wfk_tmp1(:,:) ,wfk_tmp2(:,:)
1540 
1541 ! *************************************************************************
1542 
1543 ! Hartree-Fock cannot be used with GWLS.
1544 if(dtset2%usefock==1 .and. associated(gs_hamk2%fockcommon)) then
1545   ABI_ERROR(' build_H : Hartree-Fock option can not be used with optdriver==66 (GWLS calculations).')
1546 end if
1547 
1548 !First we copy the data structure types
1549 dtset = dtset2%copy()
1550 
1551 call copy_mpi_enreg(mpi_enreg2,mpi_enreg)
1552 
1553 call copy_hamiltonian(gs_hamk,gs_hamk2)
1554 
1555 !Then we copy the standard types
1556 cpopt   = cpopt2
1557 npw_k = gs_hamk2%npw_k
1558 
1559 ABI_MALLOC(cg,(2,dtset%mpw*dtset%nspinor*dtset%mband*dtset%mkmem*dtset%nsppol))
1560 cg = cg2
1561 
1562 ABI_MALLOC(vlocal,(gs_hamk2%n4,gs_hamk2%n5,gs_hamk2%n6,gs_hamk2%nvloc))
1563 vlocal = gs_hamk2%vlocal
1564 gs_hamk%vlocal => vlocal
1565 
1566 ABI_MALLOC(kg_k,(3,npw_k))
1567 kg_k = kg_k2
1568 ABI_MALLOC(kinpw,(npw_k))
1569 kinpw = kinpw2
1570 
1571 dimffnl=0; if (blocksize<=1) dimffnl=size(gs_hamk2%ffnl_k,2)
1572 ABI_MALLOC(ffnl,(npw_k,dimffnl,gs_hamk%lmnmax,gs_hamk%ntypat))
1573 dimph3d=0; if (blocksize<=1) dimph3d=gs_hamk2%matblk
1574 ABI_MALLOC(ph3d,(2,npw_k,dimph3d))
1575 
1576 !Initializing variables from dataset
1577 nfft  =  dtset%nfft
1578 nline = dtset%nline
1579 tolwfr = dtset%tolwfr
1580 ngfft  = dtset%ngfft
1581 mcg = dtset%mpw*dtset%nspinor*dtset%mband*dtset%mkmem*dtset%nsppol
1582 nspinor=dtset%nspinor
1583 n1=ngfft(1)
1584 n2=ngfft(2)
1585 n3=ngfft(3)
1586 n4=ngfft(4)
1587 n5=ngfft(5)
1588 n6=ngfft(6)
1589 mgfft=maxval(ngfft(1:3))
1590 nbandv = int(dtset%nelect)/2
1591 ABI_MALLOC(istwfk,(dtset%nkpt))
1592 istwfk(:)=dtset%istwfk
1593 
1594 !Initializing variables from gs_hamk
1595 ucvol = gs_hamk%ucvol
1596 ABI_MALLOC(gbound,(2*mgfft+8,2))
1597 !gbound = gs_hamk%gbound_k !Must be done later for bandft paralelism
1598 
1599 !Parameters which need to be set by hand for now...
1600 weight           = 1           ! The weight of the k-pts, which sum to 1.
1601 tim_fourwf       = 0
1602 ckpt             = 1           ! The kpt of the wf to FFT. We only consider gamma point for now.
1603 
1604 ! ndat = 1 used to be hard coded, which works fine for FFT only parallelism.
1605 ndat              = 1                                                ! # of FFT to do in parallel in fourwf
1606 
1607 eshift           = 0.0         ! For PAW, opt_sij=-1 gives <G|H-lambda.S|C> in ghc instead of <G|H|C>
1608 sij_opt          = 0           ! Option in PAW to tell getghc what to compute...
1609 tim_getghc       = 0           ! timing code of the calling subroutine(can be set to 0 if not attributed)
1610 type_calc        = 0           ! option governing which part of Hamitonian is to be applied: 0: whole Hamiltonian
1611 nband            = dtset%mband
1612 ispden           = 1           !When required, the spin index to be used. We don't support spin polarized calculations yet...
1613 
1614 
1615 
1616 !========================================================================================================================
1617 ! Trying to implement band parallelism (Bruno, 14/10/2013)
1618 !------------------------------------------------------------------------------------------------------------------------
1619 !
1620 ! Band parallelism is described in the article on the LOBPCG method by F. Bottin et al,
1621 ! "Large scale ab initio calculations based on three levels of parallelisation", Computational Materials Science 42 (2008) 329-336
1622 !
1623 ! The article describes the ABINIT implementation specifically, suggesting that the information there is directly related
1624 ! to the code.
1625 !
1626 ! The main points in order to understand band + FFT parallelism are:
1627 !
1628 !     1) The processors are arranged in a 2D Cartesian MPI topology, or dimensions M x P, with
1629 !                              M  = nproc_bands
1630 !                              P  = nproc_fft
1631 !        There are of course M x P processors
1632 !
1633 !     2) the wavefunction coefficients (namely the array cg) are distributed on these processors. Unfortunately,
1634 !        two different distribution schemes are necessary: one for linear algebra (matrix products, etc), and one
1635 !        consistent with the Goedecker FFTs. The code must thus change the data distribution back and forth.
1636 !
1637 !     3) the "linear algebra" distribution describes the array cg directly. The G vectors are distributed among  all
1638 !        M x P processors, such that npw = npw_tot / (nproc_bands x nproc_fft). Every processor has information about
1639 !        all bands. Schematically:
1640 !
1641 !
1642 !                                        <- P = nproc_fft ->
1643 !                             ----------------------------------
1644 !                             |  n    |  n     |  n    |  n    |       ( for all band indices n)
1645 !                     ^       |  G_11 | ...    | ...   |  G_P1 |
1646 !                     |       ----------------------------------
1647 !             M = nproc_bands |  n    |  n     |  n    |  n    |
1648 !                     |       |  G_1. | ...    | ...   |  G_P. |
1649 !                     v       ----------------------------------
1650 !                             |  n    |  n     |  n    |  n    |
1651 !                             |  G_1M | ...    | ...   |  G_PM |
1652 !                             ----------------------------------
1653 !
1654 !
1655 !        the LOBPCG algorithm acts on blocks of bands. Thus, although each processor has information about all bands,
1656 !        we must conceptually view the bands grouped in blocks of size M, as {c_{1}(G),...,c_{M}(G)},
1657 !        {c_{M+1}(G)...,c_{2M}(G)}, ...
1658 !
1659 !        With this conceptual grouping, for a given block each processor has M x NG/(M x P) coefficients, where NG is the
1660 !        total number of G-vectors.
1661 !
1662 !     4) The distribution of information above is not appropriate for parallel FFTs. The data for one block is thus
1663 !        transposed and takes the form
1664 !
1665 !                                        <- P = nproc_fft ->
1666 !                             ----------------------------------
1667 !                             |  1    |  1     | ...   |  1    |
1668 !                     ^       |  G_1  |  G_2   | ...   |  G_P  |
1669 !                     |       ----------------------------------
1670 !             M = nproc_bands |  ..   | ...    | ...   |  ...  |
1671 !                     |       |  G_1  |  G_2   | ...   |  G_P  |
1672 !                     v       ----------------------------------
1673 !                             |  M    |  M     | ...   |  M    |
1674 !                             |  G_1  |  G_2   | ...   |  G_P  |
1675 !                             ----------------------------------
1676 !
1677 !     where the set of G vectors G_i = (G_{i1}, G_{i2}, ..., G_{iM}). Thus, a given processor has information about a single
1678 !     band, but more G vectors. Each processor has
1679 !                             NG/(M x P) x M = NG/P coefficients, just like before.
1680 !     Also, it is clear that the information is only communicated in the columns of the diagram above, avoiding
1681 !     "all processors to all processors" communication.
1682 !
1683 !     The data is now distributed properly to do parallel FFTs! Each row in the diagram above corresponds to FFTs done
1684 !     in parallel over nproc_fft processors, on a given band. There are M rows running thus in parallel!
1685 !
1686 !     The underlying ABINIT routines are equiped to handle FFT parallelism, not band distributed parallelism.
1687 !     prep_getghc.F90 and lobpgcwf.F90 show how to rearange information to be able to apply basic ABINIT routines.
1688 !
1689 !     The code below is inspired / guessed from lobpcgwf and prep_getghc. WE ASSUME THERE IS NO SPINORS!  CODE SHOULD
1690 !     BE HEAVILY REVIEWED for k-points and spinors, if and when they come to be useful.
1691 !
1692 !========================================================================================================================
1693 
1694 ! variables for band parallelism, following the language in lobpcgwf
1695 nspinor          = 1                                      ! how many spinors are present. Hardcoded to 1 for now.
1696 blocksize        = mpi_enreg%nproc_band*mpi_enreg%bandpp  ! how many bands treated in a block; # of FFT done in parallel in getghc
1697 nbdblock         = nband/blocksize                        ! how many blocks of bands are there
1698 ikpt_this_proc   = 1                                      ! Assuming only 1 kpoint, for molecules.
1699 ! This will have to be reviewed for crystals!
1700 
1701 npw_kb           = npw_k*blocksize
1702 npw_g            = gs_hamk2%npw_fft_k
1703 
1704 if (blocksize>1) then
1705   kg_k_gather  => bandfft_kpt(ikpt_this_proc)%kg_k_gather
1706   ffnl_gather  => bandfft_kpt(ikpt_this_proc)%ffnl_gather
1707   ph3d_gather  => bandfft_kpt(ikpt_this_proc)%ph3d_gather
1708   kinpw_gather => bandfft_kpt(ikpt_this_proc)%kinpw_gather
1709 else
1710   ffnl  = gs_hamk2%ffnl_k
1711   ph3d  = gs_hamk2%ph3d_k
1712   kg_k_gather  => kg_k
1713   ffnl_gather  => ffnl
1714   ph3d_gather  => ph3d
1715   kinpw_gather => kinpw
1716 endif
1717 call gs_hamk%load_k(kinpw_k=kinpw_gather,kg_k=kg_k_gather,ffnl_k=ffnl_gather,ph3d_k=ph3d_gather)
1718 
1719 gbound = gs_hamk%gbound_k
1720 
1721 !Set up wfk routines
1722 call set_wf(ucvol/nfft,n4,n5,n6,npw_k,blocksize,npw_g,mpi_enreg%comm_bandfft,mpi_enreg%comm_fft,mpi_enreg%comm_band)
1723 
1724 ! prepare the valence wavefunctions and projection operator to work in band+FFT parallel
1725 call DistributeValenceWavefunctions()
1726 call DistributeValenceKernel()
1727 
1728 cplx             = 2                                      ! wavefunctions have complex coefficients
1729 
1730 ! Initialize the dummy variables
1731 ABI_MALLOC(conjgrprj,(0,0))
1732 ABI_MALLOC(dummy2,(0,0))
1733 ABI_MALLOC(dummy3,(0,0,0))
1734 
1735 !Initialisation of the total counter for iterations of SQMR :
1736 ktot = 0
1737 
1738 !Allocations of working arrays for the module
1739 !- for pc_k function
1740 ABI_MALLOC(scprod2,(2,nband))
1741 
1742 !- for precondition function (and set_precondition subroutine)
1743 ABI_MALLOC(pcon,(npw_g))
1744 pcon = one
1745 
1746 !- for (private) working wf
1747 ABI_MALLOC(psik1,(2,npw_k))
1748 ABI_MALLOC(psik2,(2,npw_k))
1749 ABI_MALLOC(psik3,(2,npw_k))
1750 ABI_MALLOC(psik4,(2,npw_k))
1751 ABI_MALLOC(psikb1,(2,npw_kb))
1752 ABI_MALLOC(psikb2,(2,npw_kb))
1753 ABI_MALLOC(psikb3,(2,npw_kb))
1754 ABI_MALLOC(psikb4,(2,npw_kb))
1755 ABI_MALLOC(psig1,(2,npw_g))
1756 ABI_MALLOC(psig2,(2,npw_g))
1757 ABI_MALLOC(psig3,(2,npw_g))
1758 ABI_MALLOC(psig4,(2,npw_g))
1759 ABI_MALLOC(psir1,(2,n4,n5,n6))
1760 ABI_MALLOC(psir2,(2,n4,n5,n6))
1761 ABI_MALLOC(psir3,(2,n4,n5,n6))
1762 ABI_MALLOC(psidg,(2,nfft))
1763 ABI_MALLOC(denpot,(2*n4,n5,n6))
1764 
1765 psir1 = zero
1766 psir2 = zero
1767 psir3 = zero
1768 denpot = zero
1769 
1770 !Construct the vector of eigenvalues and write then to std output
1771 ABI_MALLOC(eig,(nband))
1772 eig=zero
1773 
1774 write(std_out,*) ch10,"Eigenvalues computation check, routine build_H:",ch10
1775 io_unit_debug = get_unit()
1776 write(filename_debug,'(A,I0.4,A)') "DEBUG_PROC=",mpi_enreg%me,".dat"
1777 
1778 
1779 
1780 mpi_communicator =  mpi_enreg%comm_bandfft
1781 
1782 open(file=filename_debug,status=files_status_old,unit=io_unit_debug)
1783 
1784 write(io_unit_debug,'(A)') " Parameters:"
1785 write(io_unit_debug,'(A,I5)') "                 nband = ", nband
1786 write(io_unit_debug,'(A,I5)') "             blocksize = ", blocksize
1787 write(io_unit_debug,'(A,I5)') "                 npw_k = ", npw_k
1788 write(io_unit_debug,'(A,I5)') "              nbdblock = ", nbdblock
1789 
1790 ! temporary wavefunction array, for data in the "linear algebra" distribution
1791 ABI_MALLOC( wfk_tmp1, (2,npw_k))
1792 ABI_MALLOC( wfk_tmp2, (2,npw_k))
1793 
1794 do n=1, nband
1795 ! Extract i^t/h wavefunction
1796 wfk_tmp1(:,1:npw_k) = cg(:,(n-1)*npw_k+1:n*npw_k)
1797 
1798 ! Are the wavefunctions normalized?
1799 !tmpc = cg_zdotc(npw_k,wfk_tmp1,wfk_tmp1)
1800 !call xmpi_sum(tmpc,mpi_enreg%comm_bandfft,ierr) ! sum on all processors
1801 
1802 ! DEBUGGING CODE
1803 write(std_out,'(A,I5,A,2F24.16)') "band ", n, ", <psi_n | psi_n > =", norm_k(wfk_tmp1)
1804 write(io_unit_debug,'(A,I5,A,2F24.16)') "band ", n, ", <psi_n | psi_n > =", norm_k(wfk_tmp1)
1805 flush(io_unit_debug)
1806 end do
1807 
1808 
1809 ! loop on blocks of bands. All bands in a given block will be done in parallel!
1810 
1811 ! loop on block of bands
1812 do iblock = 1, nbdblock
1813 
1814 ! loop on states for this block
1815 do iband = 1, blocksize
1816 
1817 n = (iblock-1)*blocksize+iband
1818 
1819 psikb1(:,(iband-1)*npw_k+1:iband*npw_k)   = cg(:,(n-1)*npw_k+1:n*npw_k)
1820 
1821 end do
1822 
1823 ! change configuration of the data
1824 call wf_block_distribute(psikb1,  psig1,1) ! LA -> FFT
1825 
1826 ! Apply hamiltonian on wavefunction. In bandFFT parallel, Hpsik calls prep_getghc, which knows how to transform the
1827 ! distribution of the data from "linear algebra"-like to "FFT"-like. The output is *probably* in the "linear algebra"-like
1828 ! distribution.
1829 call Hpsik(psig2,psig1)
1830 
1831 ! change configuration of the data
1832 call wf_block_distribute(psikb2,  psig2,2) ! LA -> FFT
1833 
1834 ! extract the wavefunctions band by band
1835 do iband=1, blocksize
1836 
1837 n = blocksize*(iblock-1) + iband ! band index
1838 
1839 wfk_tmp1(:,1:npw_k) = psikb1(:,(iband-1)*npw_k+1:iband*npw_k)
1840 wfk_tmp2(:,1:npw_k) = psikb2(:,(iband-1)*npw_k+1:iband*npw_k)
1841 
1842 tmpc   = cg_zdotc(npw_k,wfk_tmp1,wfk_tmp2)
1843 
1844 call xmpi_sum(tmpc,mpi_enreg%comm_bandfft,ierr) ! sum on all processors
1845 eig(n) = tmpc(1)
1846 
1847 write(std_out,'(A,I5,A,F24.16,A)') "(build_H) band ", n, ", eig =", eig(n), " Ha."
1848 
1849 ! DEBUGGING CODE
1850 write(io_unit_debug,'(A,I5,A,F24.16,A)') "band ", n, ", eig =", eig(n), " Ha."
1851 flush(io_unit_debug)
1852 flush(io_unit_debug)
1853 end do
1854 
1855 end do
1856 
1857 ABI_FREE( wfk_tmp1 )
1858 ABI_FREE( wfk_tmp2 )
1859 close(io_unit_debug)
1860 
1861 
1862 
1863 ! Finishing the construction of vxc (transcribing it from the double real grid (for the density)
1864 ! to the single real grid (for the wfs).
1865 ! Assummes we only need one spin component; one transcription per spin being needed.
1866 if(allocated(vxc_dg)) then
1867   ABI_MALLOC(vxc,(n4,n5,n6,dtset%nspden))
1868   vxc = zero
1869   call fftpac(ispden,mpi_enreg,dtset%nspden,n1,n2,n3,n4,n5,n6,dtset%ngfft,vxc_dg(:,ispden),vxc(:,:,:,ispden),2)
1870 end if
1871 
1872 timrev = 1 !Assumes istwfk *1. 1:time reversal symmetry NOT present | 2:" " " present
1873 ABI_MALLOC(title,(dtset%ntypat))
1874 do i=1,dtset%ntypat
1875 title(i) = "Bloup" ! The clean way would be to get the psps structure in this module
1876 ! (build_vxc is called from a place in GS calculations where it is available;
1877 ! should be the easiest way). For now, this allows the code to run.
1878 end do
1879 call crystal_init(dtset%amu_orig(:,1),Cryst,dtset%spgroup,dtset%natom,dtset%npsp,&
1880 &                 dtset%ntypat,dtset%nsym,dtset%rprimd_orig(:,:,1),dtset%typat,&
1881 &                 dtset%xred_orig(:,:,1),dtset%ziontypat,dtset%znucl,timrev,.false.,.false.,title,&
1882 &                 dtset%symrel,dtset%tnons,dtset%symafm)
1883 ABI_FREE(title)
1884 call Cryst%print()
1885 
1886 !TODO : Should be put in a separate build_vc constructor, and should be called right after build_H in the context of optdriver 66.
1887 if(dtset%optdriver==66) then
1888 
1889   !Set up of the k-points and tables in the whole BZ
1890   call Kmesh%init(Cryst,dtset%nkpt,dtset%kptns,dtset%kptopt,wrap_1zone=.false.)
1891   call Kmesh%print("K-mesh for the wavefunctions",std_out)
1892   call find_qmesh(Qmesh,Cryst,Kmesh)
1893   call Qmesh%print("Q-mesh for the screening function",std_out)
1894 
1895 
1896   !------------------------------
1897   !Building the vc_sqrt structure
1898   !------------------------------
1899   ! The sqrt_vc code relies on a different parallelism scheme than Vanilla ABINIT.
1900   ! The Gsphere object must be built on a single processor having all the G-vectors
1901   ! available.
1902 
1903   !gsph_init need gvec built according to the KSS convention, so that kg_k is not suitable (not properly sorted).
1904   npw_serial=npw_k
1905   call xmpi_sum(npw_serial,mpi_enreg%comm_bandfft,ierr)
1906 
1907   ecut_eff = dtset%ecut*(dtset%dilatmx)**2
1908   call make_gvec_kss(dtset%nkpt,dtset%kptns,ecut_eff,dtset%symmorphi,dtset%nsym,dtset%symrel,dtset%tnons,Cryst%gprimd,&
1909   &                      dtset%prtvol,npw_serial,gvec,ierr)
1910 
1911   call Gsphere%init(Cryst,npw_serial,gvec=gvec)
1912 
1913   call Gsphere%print()
1914 
1915   call Vcp%init(Gsphere,Cryst,Qmesh,Kmesh,dtset%rcut,dtset%gw_icutcoul,dtset%vcutgeo,dtset%ecutsigx,npw_serial,&
1916                 dtset%nkpt,dtset%kptns,mpi_enreg%comm_world)
1917 
1918   ! Since Vcp%vc_sqrt is sorted according to the KSS convention for G vectors
1919   ! BUT will be applied to GS wavefunctions (where G vectors are sorted otherwise)
1920   ! It is necessary to construct a vc_sqrt vector with the GS sorting.
1921   ! Moreover, if we are in parallel (over band / FFTs), Vcp%vc_sqrt is the serial version
1922   ! and need to be distributed according to the GS scheme (Linear Algebra configuration).
1923   ABI_MALLOC(vc_sqrt,(npw_k))
1924   vc_sqrt=zero
1925   k=0
1926   do i=1,npw_k
1927   do j=1,npw_serial
1928   if(all(kg_k(:,i)==gvec(:,j))) k=j
1929   end do
1930   vc_sqrt(i)=Vcp%vc_sqrt(k,1)
1931   end do
1932 
1933 end if
1934 
1935 !--------------------------------------------------------------------------------
1936 ! Security check : The eigenstates of the projector and the hamiltonian need to
1937 ! agree down to the precision requested (tolwfr). Otherwise, SQMR is doomed.
1938 ! Now that the density is read and the eigenstates are calculated in the GW run,
1939 ! it is less useful. A test on tolwfr could be sufficient.
1940 !
1941 ! This remains a good tool to debug band+fft parallelism
1942 !--------------------------------------------------------------------------------
1943 
1944 ! only write on the head node!
1945 if (mpi_enreg%me == 0) then
1946   io_unit = get_unit()
1947   open(file='build_H.log',status=files_status_old,unit=io_unit)
1948   write(io_unit,10) '#----------------------------------------------------------------------------'
1949   write(io_unit,10) '#'
1950   write(io_unit,10) '#               This file presents the results of a small test to check      '
1951   write(io_unit,10) '#               how well the Hamiltonian commutes with the projection        '
1952   write(io_unit,10) '#               operator.                                                    '
1953   write(io_unit,10) '#'
1954   write(io_unit,10) '# Definitions:'
1955   write(io_unit,10) '#'
1956   write(io_unit,10) '#                    P     : projections on conduction states'
1957   write(io_unit,10) '#                    H     : Hamiltonian operator'
1958   write(io_unit,10) '#                  | psi > : eigenstate'
1959   write(io_unit,10) '#'
1960   flush(io_unit)
1961 end if
1962 
1963 psikb1 = zero
1964 
1965 ! sum all valence states, on copy in every band block.
1966 do i=1,nbandv
1967 
1968 do mb = 1, blocksize
1969 
1970 psikb1(:,(mb-1)*npw_k+1:mb*npw_k) = psikb1(:,(mb-1)*npw_k+1:mb*npw_k)  + cg(:,(i-1)*npw_k+1:i*npw_k)
1971 
1972 end do
1973 
1974 end do
1975 
1976 ! normalize to one! Only sum the fist band block
1977 tmpc = cg_zdotc(npw_k,psikb1,psikb1)
1978 call xmpi_sum(tmpc,mpi_communicator ,ierr) ! sum on all processors
1979 psikb1 = psikb1/sqrt(tmpc(1))
1980 
1981 
1982 ! change data distribution
1983 call wf_block_distribute(psikb1,  psig1,1) ! LA -> FFT
1984 
1985 ! Apply P.H operator
1986 call Hpsik(psig2 ,psig1)
1987 call pc_k_valence_kernel(psig2)
1988 
1989 ! Apply H.P operator
1990 call pc_k_valence_kernel(psig1)
1991 call Hpsik(psig1)
1992 
1993 ! compute error
1994 psig3 = psig1 - psig2 ! explicitly write the difference in an array
1995 tmpc   = cg_zdotc(npw_g,psig3,psig3)
1996 commutation_error = tmpc(1)
1997 
1998 call xmpi_sum(commutation_error , mpi_enreg%comm_fft, ierr) ! sum on all processors working on FFT!
1999 
2000 ! only write on the head node!
2001 if (mpi_enreg%me == 0) then
2002   write(io_unit,20) '   || (PH -HP) |b> ||^2 =  ',commutation_error
2003   write(io_unit,20) '           tolwfr       =  ',tolwfr
2004 end if
2005 
2006 if(commutation_error > tolwfr) then
2007   !write(io_unit,10) '# || (PH -HP) |b> ||^2 > tolwfr ==> Decision taken exit!'
2008 
2009   if (mpi_enreg%me == 0) write(io_unit,10) '# || (PH -HP) |b> ||^2 > tolwfr ==> This must be fixed!'
2010 
2011   write(std_out,20) "WARNING-build_H: The tolerance tolwfr=",tolwfr
2012   write(std_out,20) "                 is smaller than the commutation error ||(PH-HP)|b>||^2=",commutation_error
2013   write(std_out,10) "                 Either set tolwfr to a less stringent value in this calculation"
2014   write(std_out,10) "                 or to a more stringent value in the wavefunction calculation."
2015 end if
2016 
2017 if (mpi_enreg%me == 0) close(io_unit)
2018 
2019 10 format(A)
2020 20 format(A,ES12.3)
2021 end subroutine build_H

m_gwls_hamiltonian/build_vxc [ Functions ]

[ Top ] [ m_gwls_hamiltonian ] [ Functions ]

NAME

  build_vxc

FUNCTION

INPUTS

OUTPUT

SOURCE

1335 subroutine build_vxc(vxc2,nfft2,nspden2)
1336 !Only transcribe the argument vxc2 in the module; the change from dg to sg is done in build_H (and stored in vxc), since the
1337 !arguments of fftpac are built in build_H.
1338 
1339 !We need the dimensions of vxc since they don't exist yet in the module; build_vxc being called before build_H.
1340 integer, intent(in) :: nfft2, nspden2
1341 real(dp), intent(in) :: vxc2(nfft2,nspden2)
1342 
1343 ! *************************************************************************
1344 
1345 ABI_MALLOC(vxc_dg,(nfft2,nspden2))
1346 vxc_dg = vxc2
1347 
1348 end subroutine build_vxc

m_gwls_hamiltonian/destroy_H [ Functions ]

[ Top ] [ m_gwls_hamiltonian ] [ Functions ]

NAME

  destroy_H

FUNCTION

INPUTS

OUTPUT

SOURCE

1364 subroutine destroy_H
1365 
1366 call dtset%free()
1367 call gs_hamk%free()
1368 
1369 call cryst%free()
1370 call Kmesh%free()
1371 call Qmesh%free()
1372 call Gsphere%free()
1373 call Vcp%free()
1374 
1375 call bandfft_kpt_destroy_array(bandfft_kpt,mpi_enreg)
1376 call destroy_mpi_enreg(mpi_enreg)
1377 
1378 !NOTE : the syntax if(allocated(a)) ABI_FREE(a) result in an error if "a" is not allocated; since the macro replace
1379 !ABI_MALLOC by more than one line of text, the second lines and up get outside the if... if() then syntax is equired.
1380 if(allocated(cg)) then
1381   ABI_FREE(cg)
1382 end if
1383 if(allocated(gbound)) then
1384   ABI_FREE(gbound)
1385 end if
1386 if(allocated(kg_k)) then
1387   ABI_FREE(kg_k)
1388 end if
1389 if(allocated(ffnl)) then
1390   ABI_FREE(ffnl)
1391 end if
1392 if(allocated(ph3d)) then
1393   ABI_FREE(ph3d)
1394 end if
1395 if(allocated(kinpw)) then
1396   ABI_FREE(kinpw)
1397 end if
1398 if(allocated(vxc)) then
1399   ABI_FREE(vxc)
1400 end if
1401 if(allocated(vlocal)) then
1402   ABI_FREE(vlocal)
1403 end if
1404 if(allocated(conjgrprj)) then
1405   ABI_FREE(conjgrprj)
1406 end if
1407 if(allocated(istwfk)) then
1408   ABI_FREE(istwfk)
1409 end if
1410 if(allocated(dummy2)) then
1411   ABI_FREE(dummy2)
1412 end if
1413 if(allocated(dummy3)) then
1414   ABI_FREE(dummy3)
1415 end if
1416 if(allocated(eig)) then
1417   ABI_FREE(eig)
1418 end if
1419 if(allocated(scprod2)) then
1420   ABI_FREE(scprod2)
1421 end if
1422 if(allocated(pcon)) then
1423   ABI_FREE(pcon)
1424 end if
1425 if(allocated(psik1)) then
1426   ABI_FREE(psik1)
1427 end if
1428 if(allocated(psik2)) then
1429   ABI_FREE(psik2)
1430 end if
1431 if(allocated(psik3)) then
1432   ABI_FREE(psik3)
1433 end if
1434 if(allocated(psik4)) then
1435   ABI_FREE(psik4)
1436 end if
1437 if(allocated(psikb1)) then
1438   ABI_FREE(psikb1)
1439 end if
1440 if(allocated(psikb2)) then
1441   ABI_FREE(psikb2)
1442 end if
1443 if(allocated(psikb3)) then
1444   ABI_FREE(psikb3)
1445 end if
1446 if(allocated(psikb4)) then
1447   ABI_FREE(psikb4)
1448 end if
1449 if(allocated(psig1)) then
1450   ABI_FREE(psig1)
1451 end if
1452 if(allocated(psig2)) then
1453   ABI_FREE(psig2)
1454 end if
1455 if(allocated(psig3)) then
1456   ABI_FREE(psig3)
1457 end if
1458 if(allocated(psig4)) then
1459   ABI_FREE(psig4)
1460 end if
1461 if(allocated(psir1)) then
1462   ABI_FREE(psir1)
1463 end if
1464 if(allocated(psir2)) then
1465   ABI_FREE(psir2)
1466 end if
1467 if(allocated(psir3)) then
1468   ABI_FREE(psir3)
1469 end if
1470 if(allocated(psidg)) then
1471   ABI_FREE(psidg)
1472 end if
1473 if(allocated(vxc_dg)) then
1474   ABI_FREE(vxc_dg)
1475 end if
1476 if(allocated(denpot)) then
1477   ABI_FREE(denpot)
1478 end if
1479 if(allocated(kernel_wavefunctions_FFT)) then
1480   ABI_FREE(kernel_wavefunctions_FFT)
1481 end if
1482 if(allocated(valence_wavefunctions_FFT)) then
1483   ABI_FREE(valence_wavefunctions_FFT)
1484 end if
1485 if(associated(gvec)) then
1486   ABI_FREE(gvec)
1487 end if
1488 if(allocated(vc_sqrt)) then
1489   ABI_FREE(vc_sqrt)
1490 end if
1491 
1492 end subroutine destroy_H

m_gwls_hamiltonian/dft_xc_energy [ Functions ]

[ Top ] [ m_gwls_hamiltonian ] [ Functions ]

NAME

  dft_xc_energy

FUNCTION

INPUTS

OUTPUT

SOURCE

695 function dft_xc_energy(e)
696 
697 real(dp) :: dft_xc_energy
698 integer, intent(in) :: e
699 
700 
701 integer :: cplex, option, ierr
702 real(dp), allocatable :: psik_e(:,:)             !Working array to store the wavefunction
703 real(dp), allocatable :: psik_e_alltoall(:,:)    !Working array to store the wavefunction
704 
705 real(dp), allocatable :: psik_out(:,:)  !Working array to store the wavefunction
706 
707 real(dp) :: tmpc(2)
708 integer  :: mpi_communicator
709 real(dp) :: dft_xc_energy_tmp
710 
711 ! *************************************************************************
712 
713 !--------------------------------------------------------------------------------
714 ! The goal of this routine is to compute the xc energy using
715 ! the DFT Vxc array.
716 !--------------------------------------------------------------------------------
717 
718 ! Parallel-MPI code. This is inspired from code within the getghc routine, which
719 ! applies a complex potential to a wavefunction.
720 
721 
722 cplex  = 1 ! real potential
723 option = 2 ! multiply wavefunction by potential
724 nspinor= 1
725 ! Allocate wavefunction arrays which contains the coefficients of the e-state
726 ABI_MALLOC(psik_e,         (2,npw_kb))
727 ABI_MALLOC(psik_e_alltoall,(2,npw_g))
728 ABI_MALLOC(psik_out,       (3,npw_g))
729 
730 
731 ! Only fetch the e-state, setting other states in block to zero!
732 psik_e(:,:)       = zero
733 psik_e(:,1:npw_k) = cg(:,(e-1)*npw_k+1:e*npw_k)
734 psik_out(:,:)     = zero
735 
736 
737 ! change the configuration of the data
738 call wf_block_distribute(psik_e,  psik_e_alltoall,1) ! LA -> FFT
739 
740 
741 ! Call fourwf to generate the product, in k-space
742 ! Computation:
743 !                       psik_e_alltoall(k) -> psi(r)
744 !                       res(r)   =  (vxc(r) x psi(r))
745 !                       psik_out(k) <- res(r)
746 !  psir3 is a dummy, not used here.
747 
748 call fourwf(cplex,vxc(:,:,:,ispden),psik_e_alltoall,psik_out,psir3,gbound,gbound,istwfk(ckpt),kg_k_gather,kg_k_gather,mgfft,&
749 &             mpi_enreg,1,ngfft,npw_g,npw_g,n4,n5,n6,option,tim_fourwf,weight,weight)
750 
751 
752 tmpc = cg_zdotc(npw_g, psik_e_alltoall,psik_out)
753 
754 mpi_communicator =  mpi_enreg%comm_fft
755 call xmpi_sum(tmpc,mpi_communicator, ierr) ! sum on all processors working on FFT!
756 
757 dft_xc_energy_tmp = tmpc(1)
758 
759 mpi_communicator =  mpi_enreg%comm_band
760 call xmpi_sum(dft_xc_energy_tmp,mpi_communicator, ierr) ! sum on all processors working on FFT!
761 
762 dft_xc_energy = dft_xc_energy_tmp
763 
764 ABI_FREE(psik_e)
765 ABI_FREE(psik_e_alltoall)
766 ABI_FREE(psik_out)
767 
768 end function dft_xc_energy

m_gwls_hamiltonian/DistributeValenceKernel [ Functions ]

[ Top ] [ m_gwls_hamiltonian ] [ Functions ]

NAME

  DistributeValenceKernel

FUNCTION

INPUTS

OUTPUT

SOURCE

276 subroutine DistributeValenceKernel()
277 !--------------------------------------------------------------------------------
278 !
279 ! This subroutine distributes, once and for all, the kernel of the static
280 ! SQMR operator.
281 !
282 ! In this first, quick and dirty implementation, we simply distribute
283 ! ALL the valence bands on ALL the FFT groups. This is not efficient, but
284 ! kernel projections are not a bottleneck of the computation.
285 !
286 ! A better (forthcoming) algorithm would only distribute the actual kernel,
287 ! not all valence bands.
288 !--------------------------------------------------------------------------------
289 
290 integer  :: mb, n
291 
292 real(dp), allocatable :: psik_n(:,:)             !wavefunctions in LA format
293 real(dp), allocatable :: psik_n_alltoall(:,:)    !wavefunctions in FFT format
294 
295 ! *************************************************************************
296 
297 
298 !===================================================================
299 ! Allocate the global array which will contain the valence states
300 !===================================================================
301 ABI_MALLOC( kernel_wavefunctions_FFT, (2,npw_g, nband))
302 
303 kernel_wavefunctions_FFT = zero
304 
305 !====================================================
306 ! Allocate working arrays
307 !====================================================
308 
309 ABI_MALLOC(psik_n,         (2,npw_kb))
310 ABI_MALLOC(psik_n_alltoall,(2,npw_g))
311 
312 
313 ! loop on all valence states
314 do n = 1, nband
315 
316 ! Copy multiple instances of this valence state in the array;
317 ! this way, each FFT group will have ALL the valence states!
318 do mb = 1, blocksize
319 psik_n(:,(mb-1)*npw_k+1:mb*npw_k)   = cg(:,(n-1)*npw_k+1:n*npw_k)
320 end do
321 
322 ! change configuration of the data
323 call wf_block_distribute(psik_n,  psik_n_alltoall,1) ! LA -> FFT
324 
325 ! copy data in global array
326 kernel_wavefunctions_FFT(:,:,n) = psik_n_alltoall(:,:)
327 
328 end do
329 
330 !====================================================
331 ! cleanup
332 !====================================================
333 
334 ABI_FREE(psik_n)
335 ABI_FREE(psik_n_alltoall)
336 
337 end subroutine DistributeValenceKernel

m_gwls_hamiltonian/DistributeValenceWavefunctions [ Functions ]

[ Top ] [ m_gwls_hamiltonian ] [ Functions ]

NAME

  DistributeValenceWavefunctions

FUNCTION

INPUTS

OUTPUT

SOURCE

193 subroutine DistributeValenceWavefunctions()
194 !--------------------------------------------------------------------------------
195 !
196 ! This subroutine distributes, once and for all, the valence wavefunctions to
197 ! the FFT configuration. They will thus be ready to be used within the
198 ! susceptibility operator.
199 !
200 !--------------------------------------------------------------------------------
201 integer  :: iblk, mb, v
202 
203 real(dp), allocatable :: psik_v(:,:)             !wavefunctions in LA format
204 real(dp), allocatable :: psik_v_alltoall(:,:)    !wavefunctions in FFT format
205 
206 ! *************************************************************************
207 
208 
209 
210 !===================================================================
211 ! Allocate the global array which will contain the valence states
212 !===================================================================
213 ABI_MALLOC( valence_wavefunctions_FFT, (2,npw_g,nbdblock))
214 
215 valence_wavefunctions_FFT = zero
216 
217 
218 !====================================================
219 ! Allocate working arrays
220 !====================================================
221 
222 ABI_MALLOC(psik_v,         (2,npw_kb))
223 ABI_MALLOC(psik_v_alltoall,(2,npw_g))
224 
225 
226 
227 ! loop on all blocks of states,
228 do iblk = 1, nbdblock
229 
230 ! loop on valence states for this block; if the state is conduction, fill with zeros
231 do mb = 1, blocksize
232 
233 v = (iblk-1)*blocksize+mb
234 
235 if (v <= nbandv) then
236   psik_v(:,(mb-1)*npw_k+1:mb*npw_k)   = cg(:,(v-1)*npw_k+1:v*npw_k)
237 else
238   psik_v(:,(mb-1)*npw_k+1:mb*npw_k)   = zero
239 end if
240 
241 end do
242 
243 ! change configuration of the data
244 call wf_block_distribute(psik_v,  psik_v_alltoall,1) ! LA -> FFT
245 
246 ! copy data in global array
247 
248 valence_wavefunctions_FFT(:,:,iblk) = psik_v_alltoall(:,:)
249 
250 end do
251 
252 !====================================================
253 ! cleanup
254 !====================================================
255 
256 ABI_FREE(psik_v)
257 ABI_FREE(psik_v_alltoall)
258 
259 
260 end subroutine DistributeValenceWavefunctions

m_gwls_hamiltonian/exchange [ Functions ]

[ Top ] [ m_gwls_hamiltonian ] [ Functions ]

NAME

  exchange

FUNCTION

INPUTS

OUTPUT

SOURCE

558 function exchange(e, Lbasis_lanczos)
559 
560 !use m_bandfft_kpt
561 use m_cgtools
562 !================================================================================
563 ! This subroutine computes the exchange energy in band+FFT parallel
564 !
565 !================================================================================
566 real(dp) :: exchange
567 
568 integer, intent(in) :: e
569 
570 ! If these arguments are provided, the exchange energy is to be projected on this subspace
571 complex(dpc), optional, intent(in) :: Lbasis_lanczos(:,:)  ! complex array which contains the Lanczos basis
572 
573 real(dp), allocatable :: psik_e(:,:)             !Working array to store the wavefunction
574 
575 real(dp), allocatable :: psik_v(:,:)             !Working array to store the wavefunction
576 
577 real(dp), allocatable :: psik_out(:,:)           !Working array to store the wavefunction
578 
579 
580 integer :: iblk, mb
581 
582 integer :: l, lmax
583 
584 real(dp) :: tmpc(2)
585 
586 logical  :: project
587 
588 ! *************************************************************************
589 
590 !--------------------------------------------------------------------------------
591 ! Determine if the exhcange energy must be projected on the Lanczos basis
592 ! a truncated Coulomb potential
593 !--------------------------------------------------------------------------------
594 
595 project = .false.
596 if (present(Lbasis_lanczos)) then
597   project = .true.
598   lmax = size(Lbasis_lanczos, 2)
599 end if
600 
601 !--------------------------------------------------------------------------------
602 ! The goal of this routine is to compute the exchange energy using
603 ! a truncated Coulomb potential
604 !--------------------------------------------------------------------------------
605 
606 exchange = 0.0_dp
607 
608 !====================================================
609 ! build the block of wavefunctions which will
610 ! contain copies the e-state
611 !====================================================
612 
613 ABI_MALLOC(psik_e,(2,npw_kb))
614 
615 ! fill psik_e with as many copies of the e-state as there are band processors;
616 ! that way, upon LA -> FFT, each row of fft processors will have the e-state
617 do v = 1, blocksize
618 psik_e(:,(v-1)*npw_k+1:v*npw_k)   = cg(:,(e-1)*npw_k+1:e*npw_k)
619 end do
620 
621 !====================================================
622 ! Allocate the block of wavefunctions which will
623 ! contain the valence states
624 !====================================================
625 
626 ABI_MALLOC(psik_v,         (2,npw_kb))
627 ABI_MALLOC(psik_out,         (2,npw_kb))
628 
629 ! loop on all blocks of states,
630 do iblk = 1, nbdblock
631 
632 ! loop on valence states for this block; if the state is conduction, fill with zeros
633 do mb = 1, blocksize
634 
635 v = (iblk-1)*blocksize+mb
636 
637 if (v <= nbandv) then
638   psik_v(:,(mb-1)*npw_k+1:mb*npw_k)   = cg(:,(v-1)*npw_k+1:v*npw_k)
639 else
640   psik_v(:,(mb-1)*npw_k+1:mb*npw_k)   = zero
641 end if
642 
643 end do
644 
645 call kbkb_to_kb(psik_out,psik_v,psik_e)
646 
647 ! apply Coulomb potential, and take norm: cumulate the exchange energy
648 do mb = 1, blocksize
649 
650 psik1 = psik_out(:,(mb-1)*npw_k+1:mb*npw_k)
651 call sqrt_vc_k(psik1)
652 
653 if (project) then
654   ! project on the Lanczos basis
655   do l = 1, lmax
656   psik2(1,:) = dble(Lbasis_lanczos(:,l))
657   psik2(2,:) = dimag(Lbasis_lanczos(:,l))
658   tmpc = scprod_k(psik2,psik1)
659   exchange = exchange - (tmpc(1)**2+tmpc(2)**2)
660   end do
661 else
662   ! compute the "exact" exchange energy
663   exchange = exchange - norm_k(psik1)**2
664 end if
665 
666 end do
667 
668 end do
669 
670 ABI_FREE(psik_e)
671 
672 ABI_FREE(psik_v)
673 
674 ABI_FREE(psik_out)
675 
676 end function exchange

m_gwls_hamiltonian/g_to_r [ Functions ]

[ Top ] [ m_gwls_hamiltonian ] [ Functions ]

NAME

  g_to_r

FUNCTION

INPUTS

OUTPUT

SOURCE

1175 subroutine g_to_r(psi_out,psi_in)
1176 
1177 real(dp), intent(out) :: psi_out(2,n4,n5,n6)
1178 real(dp), intent(in)  :: psi_in(2,npw_g)
1179 integer :: option, cplex
1180 integer :: nproc_fft, me_fft, nd3 !, ierr
1181 
1182 ! *************************************************************************
1183 
1184 option = 0 ! fft wavefunction to real space
1185 cplex  = 2 ! complex potential
1186 
1187 psig4 = psi_in
1188 psi_out = zero
1189 call fourwf(cplex,dummy3,psig4,dummy2,psi_out,gbound,gbound,istwfk(ckpt),kg_k_gather,kg_k_gather,mgfft,mpi_enreg, &
1190 1,ngfft,npw_g,npw_g,n4,n5,n6,option,tim_fourwf,weight,weight)
1191 psi_out = psi_out/sqrt(ucvol)
1192 
1193 !! This comes from prep_fourwf
1194 !nproc_fft=mpi_enreg%nproc_fft
1195 !if (nproc_fft>1) then
1196 !  me_fft=mpi_enreg%me_fft
1197 !  if (me_fft>0) then
1198 !    nd3=(ngfft(3)-1)/nproc_fft+1
1199 !    psi_out(:,:,:,me_fft*nd3+1:me_fft*nd3+nd3)=psi_out(:,:,:,1:nd3)
1200 !    psi_out(:,:,:,1:nd3)=zero
1201 !  end if
1202 !  call xmpi_sum(psi_out,mpi_enreg%comm_fft,ierr)
1203 !end if
1204 
1205 !Instead of communications the whole real space vector on all comm_fft CPUs,
1206 !it is possible to let each CPU keep it's part only and communicate only the sums.
1207 !Then, however, we need to clean the trash in the part of the real space
1208 !vector that's not attributed to the given comm_fft CPU.
1209 nproc_fft=mpi_enreg%nproc_fft
1210 if (nproc_fft>1) then
1211   me_fft=mpi_enreg%me_fft
1212   if (me_fft>0) then
1213     nd3=(ngfft(3)-1)/nproc_fft+1
1214     psi_out(:,:,:,me_fft*nd3+1:me_fft*nd3+nd3)=psi_out(:,:,:,1:nd3)
1215     psi_out(:,:,:,1:nd3)=zero
1216   end if
1217   !!!  call xmpi_sum(psi_out,mpi_enreg%comm_fft,ierr)
1218 end if
1219 
1220 end subroutine g_to_r

m_gwls_hamiltonian/gr_to_g [ Functions ]

[ Top ] [ m_gwls_hamiltonian ] [ Functions ]

NAME

  gr_to_g

FUNCTION

INPUTS

OUTPUT

SOURCE

1236 subroutine gr_to_g(psig_out,psir_in,psig_in)
1237 
1238 real(dp), intent(in)  :: psir_in(2,n4,n5,n6)
1239 real(dp), intent(in), optional :: psig_in(2,npw_g)
1240 real(dp), intent(out) :: psig_out(2,npw_g)
1241 
1242 integer :: i1, i2, i3
1243 integer :: cplex, option
1244 
1245 ! *************************************************************************
1246 
1247 cplex = 2 ! complex potential
1248 option= 2 ! multiply wavefunction by potential
1249 
1250 if(.not. present(psig_in)) then
1251   psig4(:,:) = psig_out(:,:)
1252 else
1253   psig4(:,:) = psig_in(:,:)
1254 end if
1255 
1256 do i3=1,n6
1257 do i2=1,n5
1258 do i1=1,n4
1259 denpot(2*i1-1,i2,i3)= psir_in(1,i1,i2,i3)
1260 denpot(2*i1  ,i2,i3)= psir_in(2,i1,i2,i3)
1261 end do
1262 end do
1263 end do
1264 
1265 call fourwf(          cplex, & ! complex potential
1266 denpot, & ! real space wavefunction, in denpot format
1267 psig4, & ! fourier space wavefunction
1268 psig_out, & ! result, in FFT configuration
1269 psir3,gbound,gbound,istwfk(ckpt),kg_k_gather,kg_k_gather,mgfft,mpi_enreg,1, & ! Various other arguments
1270 ngfft,npw_g,npw_g,n4,n5,n6,option,tim_fourwf,weight,weight)
1271 
1272 end subroutine gr_to_g

m_gwls_hamiltonian/Hpsik [ Functions ]

[ Top ] [ m_gwls_hamiltonian ] [ Functions ]

NAME

  Hpsik

FUNCTION

INPUTS

OUTPUT

SOURCE

1000 subroutine Hpsik(psi_out,psi_in,cte)
1001 
1002 !External variables
1003 real(dp), intent(inout) :: psi_out(2,npw_g)
1004 real(dp), intent(inout), optional :: psi_in(2,npw_g)
1005 real(dp), intent(in), optional :: cte
1006 
1007 ! *************************************************************************
1008 
1009 !psikb1 is allocated in init_hamiltonian and destroyed in destroy_hamiltonian, to avoid doing it at each application of
1010 !the hamiltonian...
1011 
1012 !We are keeping track of how the module treat each argument of the hamiltonian (subroutine getghc) here.
1013 !Legend : T = transmitted from 79_seqpar_mpi/vtowfk.F90 through the call gwls_sternheimer
1014 !         I = input argument (allocated and filled)
1015 !         O = output argument (allocated)
1016 !         D = dummy argument (declared but not allocated. Read/Write attempts should trigger a segfault.)
1017 !call getghc(cpopt,             T
1018 !psi,              I
1019 !conjgrprj,        D
1020 !Hpsik,            O
1021 !gsc,              D              (output for PAW : <G|S|C>)
1022 !gs_hamk,          T
1023 !gvnlxc,            D              (<G|Vnonlocal+VFockACE|C>)
1024 !eshift,           I              (<G|H-eshift.S|C>)
1025 !mpi_enreg,        T
1026 !ndat,             Fixed to 1     (# of FFTs to do in //)
1027 !dtset%prtvol,     T
1028 !sij_opt,          Fixed to 0     (PAW dependant : 0-><G|H|C> ; 1-><G|H|C> & <G|S|C> ; -1-><G|H-cte.S|C>)
1029 !tim_getghc,       Fixed to 0     (identity of the timer of the calling subroutine. 1:cgwf 5:lobpcg)
1030 !type_calc)        Fixed to 0     (0:whole H N:some part only)
1031 
1032 ! It is necessary to pass this optional argument to getghc if paral_kgb=1, or else the code exits cleanly with a "BUG" message...
1033 ! It will be important to properly understand what this means when we start using kpoints...
1034 
1035 if(present(psi_in)) then
1036 
1037   call getghc(cpopt,psi_in,conjgrprj,psi_out,dummy2,gs_hamk,psig4,eshift,&
1038 &             mpi_enreg,ndat,dtset%prtvol,sij_opt,tim_getghc,type_calc)
1039 
1040 else
1041   psig3 = psi_out
1042 
1043   call getghc(cpopt,psig3,conjgrprj,psi_out,dummy2,gs_hamk,psig4,eshift,&
1044 &             mpi_enreg,ndat,dtset%prtvol,sij_opt,tim_getghc,type_calc)
1045 
1046 end if
1047 if(present(cte)) then
1048   if(present(psi_in)) then
1049     psi_out = psi_out - cte*psi_in
1050   else
1051     psi_out = psi_out - cte*psig3
1052   end if
1053 end if
1054 
1055 end subroutine Hpsik

m_gwls_hamiltonian/Hpsikc [ Functions ]

[ Top ] [ m_gwls_hamiltonian ] [ Functions ]

NAME

  Hpsikc

FUNCTION

INPUTS

OUTPUT

SOURCE

1071 subroutine Hpsikc(psi_out,psi_in,cte)
1072 
1073 !External variables
1074 complex(dpc), intent(out) :: psi_out(npw_g)
1075 complex(dpc), intent(in)  :: psi_in(npw_g)
1076 complex(dpc), intent(in), optional :: cte
1077 
1078 ! *************************************************************************
1079 
1080 
1081 psig1(1,:) = dble(psi_in)
1082 psig1(2,:) = dimag(psi_in)
1083 
1084 call Hpsik(psig1)
1085 
1086 psi_out = dcmplx(psig1(1,:),psig1(2,:))
1087 
1088 if(present(cte)) psi_out = psi_out - cte*psi_in
1089 end subroutine Hpsikc

m_gwls_hamiltonian/kbkb_to_kb [ Functions ]

[ Top ] [ m_gwls_hamiltonian ] [ Functions ]

NAME

  kbkb_to_kb

FUNCTION

INPUTS

OUTPUT

SOURCE

1288 subroutine kbkb_to_kb(psik_out,psik_in_1,psik_in_2)
1289 !----------------------------------------------------------------------------------------------------
1290 ! This function computes the direct product of two wavefunctions in real space,
1291 !         psi_out(r) = psi_in_1^*(r)*psi_in_2(r)  (without complex conjugating any of the 2 wavefunctions)
1292 ! and returns the result in fourier space, psi_out(k).
1293 !
1294 ! The two input wavefunctions are in k space.
1295 !
1296 !
1297 !----------------------------------------------------------------------------------------------------
1298 real(dp), intent(out) :: psik_out(2,npw_kb)
1299 real(dp), intent(inout)  :: psik_in_1(2,npw_kb), psik_in_2(2,npw_kb)
1300 
1301 ! *************************************************************************
1302 
1303 ! change configuration of the data : LA -> FFT
1304 call wf_block_distribute(psik_in_1, psig1,1)
1305 call wf_block_distribute(psik_in_2, psig2,1)
1306 
1307 ! Fourier transform the first input
1308 call g_to_r(psir1, psig1)
1309 
1310 ! Complex conjugate in real space the first input
1311 psir1(2,:,:,:) = -psir1(2,:,:,:)
1312 
1313 ! Fourrier transform the second input, multiply component-wise
1314 call gr_to_g(psig2,psir1)
1315 
1316 ! change configuration of the output : FFT -> LA
1317 call wf_block_distribute(psik_out, psig2,2)
1318 
1319 end subroutine kbkb_to_kb

m_gwls_hamiltonian/pc_k_valence_kernel [ Functions ]

[ Top ] [ m_gwls_hamiltonian ] [ Functions ]

NAME

  pc_k_valence_kernel

FUNCTION

INPUTS

OUTPUT

SOURCE

353 subroutine pc_k_valence_kernel(psi_inout,n)
354 
355 !================================================================================
356 ! This routine projects out of the valence kernel. It assumes the input/output
357 ! array is distributed in the FFT configuration, and that the global
358 ! array containing the kernel (defined in this module) is already prepared
359 ! and ready to be used.
360 !================================================================================
361 
362 real(dp), intent(inout) :: psi_inout(2,npw_g)
363 integer , intent(in), optional :: n
364 
365 
366 real(dp), allocatable :: psi_projected(:,:)
367 
368 real(dp) :: tmpc(2)
369 
370 integer :: v, n_max
371 
372 integer :: mpi_communicator, ierr
373 
374 ! *************************************************************************
375 
376 
377 ABI_MALLOC( psi_projected, (2,npw_g))
378 
379 mpi_communicator =  mpi_enreg%comm_fft
380 
381 if (present(n)) then
382   n_max = n
383 else
384   n_max = nbandv
385 end if
386 
387 !====================================================
388 ! Compute projection
389 !====================================================
390 
391 psi_projected(:,:) = zero
392 
393 do v = 1, n_max
394 
395 ! compute overlap of kernel member with function
396 tmpc  = cg_zdotc(npw_g ,kernel_wavefunctions_FFT(:,:,v),psi_inout(:,:))
397 
398 ! Communicate results
399 call xmpi_sum(tmpc, mpi_communicator, ierr) ! sum on all processors working on FFT!
400 
401 ! add overlap with array to project
402 psi_projected(1,:) = psi_projected(1,:) + (tmpc(1)*kernel_wavefunctions_FFT(1,:,v)-tmpc(2)*kernel_wavefunctions_FFT(2,:,v))
403 psi_projected(2,:) = psi_projected(2,:) + (tmpc(1)*kernel_wavefunctions_FFT(2,:,v)+tmpc(2)*kernel_wavefunctions_FFT(1,:,v))
404 
405 !psi_inout(1,:) = psi_inout(1,:) -( tmpc(1)*kernel_wavefunctions_FFT(1,:,v)-tmpc(2)*kernel_wavefunctions_FFT(2,:,v) )
406 !psi_inout(2,:) = psi_inout(2,:) -( tmpc(1)*kernel_wavefunctions_FFT(2,:,v)+tmpc(2)*kernel_wavefunctions_FFT(1,:,v) )
407 
408 
409 end do
410 
411 if (present(n)) then
412   psi_inout(:,:) = psi_projected(:,:)
413 else
414   psi_inout(:,:) = psi_inout(:,:) - psi_projected(:,:)
415 end if
416 
417 
418 
419 ABI_FREE( psi_projected)
420 
421 end subroutine pc_k_valence_kernel

m_gwls_hamiltonian/precondition [ Functions ]

[ Top ] [ m_gwls_hamiltonian ] [ Functions ]

NAME

  precondition

FUNCTION

INPUTS

OUTPUT

SOURCE

916 subroutine precondition(psi_out,psi_in)
917 
918 real(dp), intent(out) :: psi_out(2,npw_g)
919 real(dp), intent(in)  :: psi_in(2,npw_g)
920 
921 ! *************************************************************************
922 
923 do i=1,npw_g
924 psi_out(:,i) = psi_in(:,i)*pcon(i)
925 end do
926 
927 end subroutine precondition

m_gwls_hamiltonian/precondition_cplx [ Functions ]

[ Top ] [ m_gwls_hamiltonian ] [ Functions ]

NAME

  precondition_cplx

FUNCTION

INPUTS

OUTPUT

SOURCE

943 subroutine precondition_cplx(psi_out,psi_in)
944 
945 complex(dpc), intent(out) :: psi_out(npw_g)
946 complex(dpc), intent(in)  :: psi_in(npw_g)
947 
948 ! *************************************************************************
949 
950 do i=1,npw_g
951 psi_out(i) = psi_in(i)*pcon(i)
952 end do
953 end subroutine precondition_cplx

m_gwls_hamiltonian/set_precondition [ Functions ]

[ Top ] [ m_gwls_hamiltonian ] [ Functions ]

NAME

  set_precondition

FUNCTION

INPUTS

OUTPUT

SOURCE

784 subroutine set_precondition(lambda,omega)
785 !--------------------------------------------------------------------------------
786 ! This subroutine preconditions the problem
787 !
788 !                                 A x = b,
789 !
790 ! with:
791 !
792 !        omega         lambda              Operator
793 !        ------------------------------------------------------
794 !     absent        absent         A =   (H - lambda_0)  (value of lambda_0 not important)
795 !     present       present        A =   (H - lambda)^2 + omega^2
796 !                         other cases not implemented
797 !
798 !
799 ! In the above, b = psik.
800 !
801 !--------------------------------------------------------------------------------
802 
803 ! TODO :
804 ! - eliminate the 2 "if(kinpw(i) < huge(zero)*1.0d-11)"
805 !   since ecutsm = 0.0 always (check if that's true in this gw_sternheimer subroutine).
806 
807 real(dp), intent(in), optional :: lambda, omega
808 
809 real(dp) :: poly, x
810 logical :: omega_imaginary
811 
812 
813 !integer,save :: counter = 0
814 !integer      :: io_unit
815 !character(18):: filename
816 !logical      :: file_exists
817 
818 ! *************************************************************************
819 
820 
821 if (present(lambda) .and. present(omega))  then
822   omega_imaginary        =.true.
823 else
824   omega_imaginary        =.false.
825 end if
826 
827 !io_unit  = get_unit()
828 !filename = "PRECONDITIONER.log"
829 !inquire(file=filename,exist=file_exists)
830 
831 !if (file_exists) then
832 !      open(io_unit,file=filename,position='append',status=files_status_old)
833 !else
834 !      open(io_unit,file=filename,status=files_status_new)
835 !        write(io_unit,10) "#======================================================================================="
836 !        write(io_unit,10) "#                                                                                       "
837 !        write(io_unit,10) "#   This file contains information regarding the preconditioning scheme for SQMR.       "
838 !        write(io_unit,10) "#                                                                                       "
839 !        write(io_unit,10) "#======================================================================================="
840 !end if
841 
842 !counter = counter + 1
843 !write(io_unit,10) "#                                                                                       "
844 !write(io_unit,15) "#   Call #:", counter
845 !if (present(lambda) .and. present(omega))  then
846 !        write(io_unit,10) "#    lambda and omega are present: imaginary frequency case"
847 !else
848 !        write(io_unit,10) "#    lambda and omega are absent: omega = 0 case"
849 !end if
850 
851 !do i=1,npw_k
852 
853 do i = 1, npw_g
854 
855 if(omega_imaginary) then
856   x = (kinpw_gather(i)-lambda)**2 + omega**2
857 else
858   x = kinpw_gather(i)
859 end if
860 
861 if(x < huge(zero)*1.0d-11) then
862   poly    = 27.0 + x*(18.0 + x*(12.0 + 8.0*x))
863   pcon(i) = poly/(poly + 16.0*(x**4))
864   !pcon(i) = 1.0/(1.0+x) !I don't know why, it gives better results for Silane than the above polynomial.
865 else
866   pcon(i) = zero
867 end if
868 end do
869 
870 !write(io_unit,30) "                prec(       1:   10) =  ",pcon(1:10)
871 !write(io_unit,30) "                prec(npw_k-10:npw_k) =  ",pcon(npw_k-10:npw_k)
872 
873 !close(io_unit)
874 
875 !10 format(A)
876 !15 format(A,I8)
877 !30 format(A,1000(ES24.12))
878 
879 end subroutine set_precondition

m_gwls_hamiltonian/sqrt_vc_k [ Functions ]

[ Top ] [ m_gwls_hamiltonian ] [ Functions ]

NAME

  sqrt_vc_k

FUNCTION

INPUTS

OUTPUT

SOURCE

969 subroutine sqrt_vc_k(psi_inout)
970 
971 !External variables
972 real(dp), intent(inout) :: psi_inout(2,npw_k)
973 
974 !Internal variable
975 complex(dpc) :: c
976 
977 ! *************************************************************************
978 
979 do i=1,npw_k
980 c = vc_sqrt(i) * cmplx(psi_inout(1,i),psi_inout(2,i),dpc)
981 psi_inout(1,i) = dble (c)
982 psi_inout(2,i) = dimag(c)
983 end do
984 end subroutine sqrt_vc_k

m_gwls_hamiltonian/unset_precondition [ Functions ]

[ Top ] [ m_gwls_hamiltonian ] [ Functions ]

NAME

  unset_precondition

FUNCTION

INPUTS

OUTPUT

SOURCE

895 subroutine unset_precondition()
896 
897 ! *************************************************************************
898 
899 pcon = one
900 end subroutine unset_precondition

m_gwls_hamiltonian/wf_block_distribute [ Functions ]

[ Top ] [ m_gwls_hamiltonian ] [ Functions ]

NAME

  wf_block_distribute

FUNCTION

INPUTS

OUTPUT

SOURCE

437 subroutine wf_block_distribute(psik, psik_alltoall, direction)
438 
439 !================================================================================
440 !
441 ! This subroutine distributes a block of wavefunctions in the "linear algebra"
442 ! configuration, to a configuration appropriate to perform FFT (or apply Hamiltonian).
443 !
444 ! The code below is inspired by the subroutine prep_getghc, which rearranges
445 ! data over MPI processors prior to applying the Hamiltonian.
446 !
447 ! input:
448 !             npw_kb :   dimension of wavefunctions in the "linear algebra" configuration,
449 !                               which means the G-vectors are distributed over all
450 !                               processors, and every processor has information about all the bands.
451 !
452 !
453 !             npw_g       :   dimension of wavefunctions in the "FFT" configuration,
454 !                               which means the G-vectors are distributed along rows of the
455 !                               MPI topology, and a given row only has one band.
456 !
457 !              direction    :   Are we going from LA to FFT, or from FFT to LA?
458 !                              1 : LA  -> FFT
459 !                              2 : LA <-  FFT
460 !
461 ! input/output:
462 !              psik          : block of wavefunctions, in LA configuration
463 !              psik_alltoall : wavefunction, in FFT configuration
464 !================================================================================
465 
466 integer , intent(in)    :: direction                   ! flag which determines the direction of the transfer
467 real(dp), intent(inout) :: psik(2,npw_kb)       ! block of wavefunctions, in "linear algebra" configuration
468 real(dp), intent(inout) :: psik_alltoall(2,npw_g)    ! wavefunction in "FFT" configuration; a single band, but more G-vectors
469 
470 
471 integer  :: ier, spaceComm
472 
473 integer,allocatable :: rdisplsloc(:)
474 integer,allocatable :: recvcountsloc(:)
475 integer,allocatable :: sdisplsloc(:)
476 integer,allocatable :: sendcountsloc(:)
477 
478 integer :: nproc_band,  bandpp
479 
480 integer,pointer :: rdispls(:)
481 integer,pointer :: recvcounts(:)
482 integer,pointer :: sdispls(:)
483 integer,pointer :: sendcounts(:)
484 
485 ! *************************************************************************
486 
487 
488 ! extract information in order to perform MPI communication.
489 ! This code comes from prep_getghc
490 nproc_band = mpi_enreg%nproc_band
491 bandpp     = mpi_enreg%bandpp
492 
493 if(mpi_enreg%nproc_band*mpi_enreg%bandpp > 1) then
494 
495   spaceComm=mpi_enreg%comm_fft
496   if(mpi_enreg%paral_kgb==1) spaceComm=mpi_enreg%comm_band
497 
498   ABI_MALLOC(sendcountsloc,(nproc_band))
499   ABI_MALLOC(sdisplsloc   ,(nproc_band))
500   ABI_MALLOC(recvcountsloc,(nproc_band))
501   ABI_MALLOC(rdisplsloc   ,(nproc_band))
502 
503   recvcounts   =>bandfft_kpt(ikpt_this_proc)%recvcounts(:)
504   sendcounts   =>bandfft_kpt(ikpt_this_proc)%sendcounts(:)
505   rdispls      =>bandfft_kpt(ikpt_this_proc)%rdispls   (:)
506   sdispls      =>bandfft_kpt(ikpt_this_proc)%sdispls   (:)
507 
508   recvcountsloc(:)= recvcounts(:)*2*nspinor*bandpp
509   rdisplsloc(:)   = rdispls(:)*2*nspinor*bandpp
510   sendcountsloc(:)= sendcounts(:)*2*nspinor
511   sdisplsloc(:)   = sdispls(:)*2*nspinor
512 
513   ! use MPI to communicate information!
514   if (direction == 1) then
515     ! LA -> FFT
516     call xmpi_alltoallv(psik, sendcountsloc, sdisplsloc, psik_alltoall, recvcountsloc,rdisplsloc, spaceComm, ier)
517 
518   else if (direction == 2) then
519     ! FFT -> LA
520 
521     call xmpi_alltoallv(psik_alltoall,recvcountsloc,rdisplsloc, psik, sendcountsloc,sdisplsloc,spaceComm,ier)
522 
523   end if
524 
525   ABI_FREE(sendcountsloc)
526   ABI_FREE(sdisplsloc   )
527   ABI_FREE(recvcountsloc)
528   ABI_FREE(rdisplsloc   )
529 
530 else
531 
532   if(direction == 1) psik_alltoall = psik
533 
534   if(direction == 2) psik = psik_alltoall
535 
536 end if
537 
538 end subroutine wf_block_distribute

TABLE OF CONTENTS

ABINIT/m_gwls_hamiltonian [ Modules ]

m_gwls_hamiltonian/build_H [ Functions ]

m_gwls_hamiltonian/build_vxc [ Functions ]

m_gwls_hamiltonian/destroy_H [ Functions ]

m_gwls_hamiltonian/dft_xc_energy [ Functions ]

m_gwls_hamiltonian/DistributeValenceKernel [ Functions ]

m_gwls_hamiltonian/DistributeValenceWavefunctions [ Functions ]

m_gwls_hamiltonian/exchange [ Functions ]

m_gwls_hamiltonian/g_to_r [ Functions ]

m_gwls_hamiltonian/gr_to_g [ Functions ]

m_gwls_hamiltonian/Hpsik [ Functions ]

m_gwls_hamiltonian/Hpsikc [ Functions ]

m_gwls_hamiltonian/kbkb_to_kb [ Functions ]

m_gwls_hamiltonian/pc_k_valence_kernel [ Functions ]

m_gwls_hamiltonian/precondition [ Functions ]

m_gwls_hamiltonian/precondition_cplx [ Functions ]

m_gwls_hamiltonian/set_precondition [ Functions ]

m_gwls_hamiltonian/sqrt_vc_k [ Functions ]

m_gwls_hamiltonian/unset_precondition [ Functions ]

m_gwls_hamiltonian/wf_block_distribute [ Functions ]