TABLE OF CONTENTS
ABINIT/fresid [ Functions ]
NAME
fresid
FUNCTION
If option=1, compute the forces due to the residual of the potential If option=2, generate approximate new density from old one, old atomic positions, and new atomic positions
COPYRIGHT
Copyright (C) 1998-2018 ABINIT group (XG, MM, MT) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt . For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
INPUTS
dtset <type(dataset_type)>=all input variables in this dataset | icoulomb=0 periodic treatment of Hartree potential, 1 use of Poisson solver | natom=number of atoms in cell. | nspden=number of spin-density components | typat(natom)=integer type for each atom in cell | usepaw= 0 for non paw calculation; =1 for paw calculation | xclevel= level of the XC functional mpi_enreg=information about MPI parallelization nfft=(effective) number of FFT grid points (for this processor) ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft ntypat=number of types of atoms in cell. option=see below pawtab(ntypat*dtset%usepaw) <type(pawtab_type)>=paw tabulated starting data rhor(nfft,nspden)=electron density in electrons/bohr**3 (slices of it if FTT parallelism). rprimd(3,3)=dimensional primitive translation vectors (bohr) ucvol=unit cell volume (bohr**3). xred_new(3,natom)=new reduced coordinates for atoms in unit cell xred_old(3,natom)=old reduced coordinates for atoms in unit cell znucl(ntypat)=real(dp), atomic number of atom type
OUTPUT
gresid(3,natom)=forces due to the residual of the potential
SIDE EFFECTS
work(nfft,nspden)=functions on the fft grid (slices of it if FTT parallelism): if option==1, the POTENTIAL residual is input if option==2, the interpolated density is output
NOTES
FFT parallelism: At the beginning of this routine, the plane-waves are ditributed over all the processors. In the main part, all the processors perform the same calculations over the whole FFT grid. At the end, each processor gets its part of the whole FFT grid. These modifications are not efficient when large FFT grids are used. So they have to be considered as a first step before a comprehensive parallelization of this routine.
PARENTS
forces,prcref,prcref_PMA,scfcv
CHILDREN
atomdata_from_znucl,mean_fftr,pre_gather,pre_scatter
SOURCE
64 #if defined HAVE_CONFIG_H 65 #include "config.h" 66 #endif 67 68 #include "abi_common.h" 69 70 subroutine fresid(dtset,gresid,mpi_enreg,nfft,ngfft,ntypat,option,& 71 & pawtab,rhor,rprimd,ucvol,work,xred_new,xred_old,znucl) 72 73 use defs_basis 74 use defs_abitypes 75 use m_profiling_abi 76 use m_cgtools 77 use m_atomdata 78 79 use m_pawtab, only : pawtab_type 80 use m_mpinfo, only : pre_gather, pre_scatter 81 82 !This section has been created automatically by the script Abilint (TD). 83 !Do not modify the following lines by hand. 84 #undef ABI_FUNC 85 #define ABI_FUNC 'fresid' 86 !End of the abilint section 87 88 implicit none 89 90 !Arguments ------------------------------------ 91 !scalars 92 integer,intent(in) :: nfft,ntypat,option 93 real(dp),intent(in) :: ucvol 94 type(MPI_type),intent(in) :: mpi_enreg 95 type(dataset_type),intent(in) :: dtset 96 !arrays 97 integer,intent(in) :: ngfft(18) 98 real(dp),intent(in) :: rhor(nfft,dtset%nspden),rprimd(3,3) 99 real(dp),intent(in) :: xred_new(3,dtset%natom),xred_old(3,dtset%natom) 100 real(dp),intent(in) :: znucl(ntypat) 101 real(dp),intent(inout) :: work(nfft,dtset%nspden) 102 real(dp),intent(out) :: gresid(3,dtset%natom) 103 type(pawtab_type),intent(in) :: pawtab(ntypat*dtset%usepaw) 104 105 !Local variables------------------------------- 106 !real(dp), parameter :: app_remain=0.001_dp 107 !scalars 108 integer,parameter :: natnum=110 109 integer :: atmove,i1,i1_new,i1m,i1p,i2,i2_new,i2m,i2p,i3,i3_new,i3m,i3p 110 integer :: iatom,ifft,ifft_new,iloop,ind2m,ind2m3m,ind2p,ind2p3p,ind3m,ind3p 111 integer :: index,index_new,ishift,ishift1,ishift2,ishift3,ispden,ixp,mshift,mu 112 integer :: n1,n2,n3,n4,nfft_tmp,nfftot,nu,quit 113 real(dp),parameter :: app_remain=0.01_dp 114 real(dp) :: diff_rem1,diff_rem2,diff_rem3,difmag,difmag2 115 real(dp) :: difmag2_fact,difmag2_part,drho1,drho11,drho12,drho13,drho14 116 real(dp) :: drho1dn,drho1mx,drho1my,drho1mz,drho1tot,drho1up,drho2,drho21 117 real(dp) :: drho22,drho23,drho24,drho2dn,drho2mx,drho2my,drho2mz,drho2tot 118 real(dp) :: drho2up,drho3,drho31,drho32,drho33,drho34,drho3dn,drho3mx,drho3my 119 real(dp) :: drho3mz,drho3tot,drho3up,drhox00,drhox01,drhox10,drhox11,drhoxy0 120 real(dp) :: drhoxy1,drhoxyz,fact,range,range2,rcov,rcov2,rcovm1,rdiff1 121 real(dp) :: rdiff2,rdiff3,vresid1,vresid2,vresid3,vresid4,xx 122 type(atomdata_t) :: atom 123 !arrays 124 integer :: diff_igrid(3),igrid(3),irange(3) 125 integer,allocatable :: ii(:,:) 126 real(dp) :: diff_grid(3),diff_rem(3),diff_tau(3),diff_xred(3),lencp(3) 127 real(dp) :: rho_tot(4),rhosum(4),rmet(3,3),scale(3),tau(3) 128 real(dp),allocatable :: approp(:),atmrho(:,:),rhor_tot(:,:),rrdiff(:,:) 129 real(dp),allocatable :: work_tot(:,:) 130 logical,allocatable :: my_sphere(:) 131 132 ! ************************************************************************* 133 134 !Compute lengths of cross products for pairs of primitive 135 !translation vectors (used in setting index search range below) 136 lencp(1)=cross_fr(rprimd(1,2),rprimd(2,2),rprimd(3,2),& 137 & rprimd(1,3),rprimd(2,3),rprimd(3,3)) 138 lencp(2)=cross_fr(rprimd(1,3),rprimd(2,3),rprimd(3,3),& 139 & rprimd(1,1),rprimd(2,1),rprimd(3,1)) 140 lencp(3)=cross_fr(rprimd(1,1),rprimd(2,1),rprimd(3,1),& 141 & rprimd(1,2),rprimd(2,2),rprimd(3,2)) 142 143 !Compute factor R1.(R2xR3)/|R2xR3| etc for 1, 2, 3 144 !(recall ucvol=R1.(R2xR3)) 145 scale(:)=ucvol/lencp(:) 146 147 !initialize diff_igrid, otherwise valgrind complains 148 diff_igrid=0 149 150 !Compute metric tensor in real space rmet 151 do nu=1,3 152 rmet(:,nu)=rprimd(1,:)*rprimd(1,nu)+rprimd(2,:)*rprimd(2,nu)+rprimd(3,:)*rprimd(3,nu) 153 end do 154 155 !FFT parallelization: Starting from now, calculations are performed on the whole FFT grid 156 !and no more on slices. The nfft variable becomes nfft_tmp until the end 157 n1=ngfft(1);n2=ngfft(2);n3=ngfft(3) 158 n4=n3/mpi_enreg%nproc_fft 159 nfftot=PRODUCT(ngfft(1:3));nfft_tmp=nfftot 160 if(mpi_enreg%paral_kgb==1) then 161 ABI_ALLOCATE(rhor_tot,(nfftot,dtset%nspden)) 162 ABI_ALLOCATE(work_tot,(nfftot,dtset%nspden)) 163 do ispden=1,dtset%nspden 164 call pre_gather(rhor(:,ispden),rhor_tot(:,ispden),n1,n2,n3,n4,mpi_enreg) 165 call pre_gather(work(:,ispden),work_tot(:,ispden),n1,n2,n3,n4,mpi_enreg) 166 end do 167 end if 168 169 gresid(1:3,1:dtset%natom)=0.0_dp 170 quit=0 171 172 !Initialize appropriation function 173 ABI_ALLOCATE(approp,(nfft_tmp)) 174 ABI_ALLOCATE(atmrho,(nfft_tmp,dtset%nspden)) 175 ABI_ALLOCATE(my_sphere,(nfft_tmp)) 176 177 approp(:)=app_remain 178 !First loop over atoms in unit cell : build appropriation function 179 !Second loop : compute forces 180 do iloop=1,2 181 182 ! Take into account the remaining density 183 if(option==2 .and. iloop==2)then 184 if(mpi_enreg%paral_kgb==1) then 185 ! FFT parallelization: All the processors perform the same calculation. 186 ! We divided by nproc_fft in order to "remove" the xmpi_sum made in mean_fftr 187 do ispden=1,dtset%nspden 188 do ifft=1,nfft_tmp 189 work_tot(ifft,ispden)=rhor_tot(ifft,ispden)*approp(ifft)*app_remain 190 end do 191 end do 192 call mean_fftr(work_tot,rhosum,nfft_tmp,nfftot,dtset%nspden,mpi_comm_sphgrid=mpi_enreg%comm_fft) 193 rhosum(1:dtset%nspden)=rhosum(1:dtset%nspden)/mpi_enreg%nproc_fft 194 else 195 do ispden=1,dtset%nspden 196 do ifft=1,nfft_tmp 197 work(ifft,ispden)=rhor(ifft,ispden)*approp(ifft)*app_remain 198 end do 199 end do 200 call mean_fftr(work,rhosum,nfft_tmp,nfftot,dtset%nspden,mpi_comm_sphgrid=mpi_enreg%comm_fft) 201 end if 202 203 ! This will be used to restore proper normalization of density 204 rho_tot(1:dtset%nspden)=rhosum(1:dtset%nspden)*nfftot 205 end if 206 207 do iatom=1,dtset%natom 208 209 ! Get the covalent radius 210 call atomdata_from_znucl(atom,znucl(dtset%typat(iatom))) 211 rcov = atom%rcov 212 ! PAW choose PAW radius instead... 213 if (dtset%usepaw==1) rcov=max(rcov,pawtab(dtset%typat(iatom))%rpaw) 214 215 ! Set search range 216 rcov2=rcov**2 217 range=2._dp*rcov 218 range2=range**2 219 rcovm1=1.0_dp/rcov 220 221 ! Use range to compute an index range along R(1:3) 222 ! (add 1 to make sure it covers full range) 223 irange(1)=1+nint((range/scale(1))*dble(n1)) 224 irange(2)=1+nint((range/scale(2))*dble(n2)) 225 irange(3)=1+nint((range/scale(3))*dble(n3)) 226 227 ! Allocate ii and rrdiff 228 mshift=2*maxval(irange(1:3))+1 229 ABI_ALLOCATE(ii,(mshift,3)) 230 ABI_ALLOCATE(rrdiff,(mshift,3)) 231 232 ! Consider each component in turn 233 do mu=1,3 234 235 ! Convert reduced coord of given atom to [0,1) 236 tau(mu)=mod(xred_old(mu,iatom)+1._dp-aint(xred_old(mu,iatom)),1._dp) 237 238 ! Use tau to find nearest grid point along R(mu) 239 ! (igrid=0 is the origin; shift by 1 to agree with usual index) 240 igrid(mu)=nint(tau(mu)*dble(ngfft(mu))) 241 242 ! Set up a counter that explore the relevant range 243 ! of points around the atom 244 ishift=0 245 do ixp=igrid(mu)-irange(mu),igrid(mu)+irange(mu) 246 ishift=ishift+1 247 ii(ishift,mu)=1+mod(ngfft(mu)+mod(ixp,ngfft(mu)),ngfft(mu)) 248 rrdiff(ishift,mu)=dble(ixp)/dble(ngfft(mu))-tau(mu) 249 end do 250 251 ! If option 2, set up quantities related with the change of atomic coordinates 252 if(option==2 .and. iloop==2)then 253 diff_xred(mu)=xred_new(mu,iatom)-xred_old(mu,iatom) 254 ! Convert to [0,1) 255 diff_tau(mu)=mod(diff_xred(mu)+1._dp-aint(diff_xred(mu)),1._dp) 256 ! Convert to [0,ngfft) 257 diff_grid(mu)=diff_tau(mu)*dble(ngfft(mu)) 258 ! Integer part 259 diff_igrid(mu)=int(diff_grid(mu)) 260 ! Compute remainder 261 diff_rem(mu)=diff_grid(mu)-diff_igrid(mu) 262 263 ! DEBUG 264 ! write(std_out,*)' mu,diff',mu,diff_igrid(mu),diff_rem(mu) 265 ! ENDDEBUG 266 267 end if 268 269 ! End loop on mu 270 end do 271 272 ! May be the atom is fixed 273 atmove=1 274 if(option==2 .and. iloop==2)then 275 if(diff_xred(1)**2+diff_xred(2)**2+diff_xred(3)**2 < 1.0d-24)then 276 atmove=0 277 else 278 diff_rem1=diff_rem(1) 279 diff_rem2=diff_rem(2) 280 diff_rem3=diff_rem(3) 281 end if 282 end if 283 284 ! If second loop, initialize atomic density, and the variable 285 ! that says whether a fft point belongs to the sphere of the atom 286 if(iloop==2) then 287 atmrho(:,:)=0.0_dp 288 my_sphere(:)=.false. 289 end if 290 291 ! Conduct triple loop over restricted range of grid points for iatom 292 293 do ishift3=1,1+2*irange(3) 294 ! map back to [1,ngfft(3)] for usual fortran index in unit cell 295 i3=ii(ishift3,3) 296 i3m=i3-1 ; if(i3==1)i3m=n3 297 i3p=i3+1 ; if(i3==n3)i3p=1 298 299 ! find vector from atom location to grid point (reduced) 300 rdiff3=rrdiff(ishift3,3) 301 302 do ishift2=1,1+2*irange(2) 303 i2=ii(ishift2,2) 304 i2m=i2-1 ; if(i2==1)i2m=n2 305 i2p=i2+1 ; if(i2==n2)i2p=1 306 index=n1*(i2-1+n2*(i3-1)) 307 ind3m=n1*(i2-1+n2*(i3m-1)) 308 ind3p=n1*(i2-1+n2*(i3p-1)) 309 ind2m=n1*(i2m-1+n2*(i3-1)) 310 ind2p=n1*(i2p-1+n2*(i3-1)) 311 ind2p3p=n1*(i2p-1+n2*(i3p-1)) 312 313 rdiff2=rrdiff(ishift2,2) 314 ! Prepare the computation of difmag2 315 difmag2_part=rmet(3,3)*rdiff3**2+rmet(2,2)*rdiff2**2& 316 & +2.0_dp*rmet(3,2)*rdiff3*rdiff2 317 difmag2_fact=2.0_dp*(rmet(3,1)*rdiff3+rmet(2,1)*rdiff2) 318 319 do ishift1=1,1+2*irange(1) 320 rdiff1=rrdiff(ishift1,1) 321 322 ! Compute (rgrid-tau-Rprim)**2 323 difmag2= difmag2_part+rdiff1*(difmag2_fact+rmet(1,1)*rdiff1) 324 325 ! Only accept contribution inside defined range 326 ! This condition means that x, calculated below, cannot exceed 2.0_dp 327 if (difmag2<range2) then 328 329 ! Will compute contribution to appropriation function based on 330 ! rcov2, range2 and difmag2 331 i1=ii(ishift1,1) 332 ifft=i1+index 333 334 if(iloop==1)then 335 336 ! Build appropriation function 337 if (difmag2<rcov2)then 338 approp(ifft)=approp(ifft)+1.0_dp 339 else 340 difmag=sqrt(difmag2) 341 xx=difmag*rcovm1 342 ! The following function is 1. at xx=1, 0. at xx=2, with vanishing 343 ! derivatives at these points. 344 approp(ifft)=approp(ifft)+((2.0_dp*xx-9.0_dp)*xx+12.0_dp)*xx-4.0_dp 345 end if 346 347 else 348 349 if (difmag2<rcov2) then 350 fact=one 351 else 352 difmag=sqrt(difmag2) 353 xx=difmag*rcovm1 354 fact=((2.0_dp*xx-9.0_dp)*xx+12.0_dp)*xx-4.0_dp 355 end if 356 357 ! Build atomic density 358 if(mpi_enreg%paral_kgb==1) then 359 atmrho(ifft,1:dtset%nspden)=atmrho(ifft,1:dtset%nspden) & 360 & +rhor_tot(ifft,1:dtset%nspden)*fact*approp(ifft) 361 else 362 atmrho(ifft,1:dtset%nspden)=atmrho(ifft,1:dtset%nspden) & 363 & +rhor(ifft,1:dtset%nspden)*fact*approp(ifft) 364 end if 365 366 ! Compute the sphere of the atom : it is different for 367 ! option 1 and for option 2 368 i1p=i1+1 ; if(i1==n1)i1p=1 369 if(option==1)then 370 i1m=i1-1 ; if(i1==1)i1m=n1 371 my_sphere(ifft)=.true. 372 my_sphere(i1p+index)=.true. ; my_sphere(i1m+index)=.true. 373 my_sphere(i1+ind2p)=.true. ; my_sphere(i1+ind2m)=.true. 374 my_sphere(i1+ind3p)=.true. ; my_sphere(i1+ind3m)=.true. 375 else 376 my_sphere(ifft)=.true. ; my_sphere(i1p+index)=.true. 377 my_sphere(i1+ind2p)=.true. ; my_sphere(i1p+ind2p)=.true. 378 my_sphere(i1+ind3p)=.true. ; my_sphere(i1p+ind3p)=.true. 379 my_sphere(i1+ind2p3p)=.true. ; my_sphere(i1p+ind2p3p)=.true. 380 end if 381 382 end if 383 384 ! End of condition on the range 385 end if 386 387 ! End loop on ishift1 388 end do 389 390 ! End loop on ishift2 391 end do 392 393 ! End loop on ishift3 394 end do 395 ! At the end of the second loop for each atom, compute the force 396 ! from the atomic densities, or translate density. 397 ! In the first case, use a two-point finite-difference approximation, 398 ! since this calculation serves only to decrease the error, 399 ! and should not be very accurate, but fast. 400 ! In the second case, using a crude trilinear interpolation scheme 401 ! for the same reason. 402 ! 403 ! The section is skipped if option==2 and the atom is fixed 404 if(iloop==2 .and. (option==1 .or. atmove==1) )then 405 406 do i3=1,n3 407 i3m=i3-1 ; if(i3==1)i3m=n3 408 i3p=i3+1 ; if(i3==n3)i3p=1 409 ! note: diff_igrid is only set if(option==2 .and. iloop==2) 410 i3_new=i3+diff_igrid(3) ; if(i3_new > n3)i3_new=i3_new-n3 411 do i2=1,n2 412 i2m=i2-1 ; if(i2==1)i2m=n2 413 i2p=i2+1 ; if(i2==n2)i2p=1 414 i2_new=i2+diff_igrid(2) ; if(i2_new > n2)i2_new=i2_new-n2 415 index=n1*(i2-1+n2*(i3-1)) 416 index_new=n1*(i2_new-1+n2*(i3_new-1)) 417 ind3m=n1*(i2-1+n2*(i3m-1)) 418 ind3p=n1*(i2-1+n2*(i3p-1)) 419 ind2m=n1*(i2m-1+n2*(i3-1)) 420 ind2p=n1*(i2p-1+n2*(i3-1)) 421 ind2m3m=n1*(i2m-1+n2*(i3m-1)) 422 do i1=1,n1 423 ifft=i1+index 424 if(my_sphere(ifft))then 425 426 i1m=i1-1 ; if(i1==1)i1m=n1 427 428 if(option==1)then 429 ! Treat option 1 : computation of residual forces 430 i1p=i1+1 ; if(i1==n1)i1p=1 431 ! Distinguish spin-unpolarized and spin-polarized 432 if(dtset%nspden==1)then ! Non magnetic 433 ! Note that the factor needed to obtain a true finite difference 434 ! estimation of the derivative will be applied afterwards, for speed 435 drho1=atmrho(i1p+index,1)-atmrho(i1m+index,1) 436 drho2=atmrho(i1+ind2p,1) -atmrho(i1+ind2m,1) 437 drho3=atmrho(i1+ind3p,1) -atmrho(i1+ind3m,1) 438 if(mpi_enreg%paral_kgb==1) then 439 vresid1=work_tot(ifft,1) 440 else 441 vresid1=work(ifft,1) 442 end if 443 gresid(1,iatom)=gresid(1,iatom)+drho1*vresid1 444 gresid(2,iatom)=gresid(2,iatom)+drho2*vresid1 445 gresid(3,iatom)=gresid(3,iatom)+drho3*vresid1 446 else if(dtset%nspden==2) then ! Collinear magnetism 447 drho1tot=atmrho(i1p+index,1)-atmrho(i1m+index,1) 448 drho2tot=atmrho(i1+ind2p,1) -atmrho(i1+ind2m,1) 449 drho3tot=atmrho(i1+ind3p,1) -atmrho(i1+ind3m,1) 450 drho1up=atmrho(i1p+index,2)-atmrho(i1m+index,2) 451 drho2up=atmrho(i1+ind2p,2) -atmrho(i1+ind2m,2) 452 drho3up=atmrho(i1+ind3p,2) -atmrho(i1+ind3m,2) 453 drho1dn=drho1tot-drho1up 454 drho2dn=drho2tot-drho2up 455 drho3dn=drho3tot-drho3up 456 if(mpi_enreg%paral_kgb==1) then 457 vresid1=work_tot(ifft,1) 458 vresid2=work_tot(ifft,2) 459 else 460 vresid1=work(ifft,1) 461 vresid2=work(ifft,2) 462 end if 463 gresid(1,iatom)=gresid(1,iatom)+drho1up*vresid1+drho1dn*vresid2 464 gresid(2,iatom)=gresid(2,iatom)+drho2up*vresid1+drho2dn*vresid2 465 gresid(3,iatom)=gresid(3,iatom)+drho3up*vresid1+drho3dn*vresid2 466 else ! Non-collinear magnetism 467 drho1tot=atmrho(i1p+index,1)-atmrho(i1m+index,1) 468 drho1mx =atmrho(i1p+index,2)-atmrho(i1m+index,2) 469 drho1my =atmrho(i1p+index,3)-atmrho(i1m+index,3) 470 drho1mz =atmrho(i1p+index,4)-atmrho(i1m+index,4) 471 drho2tot=atmrho(i1+ind2p,1) -atmrho(i1+ind2m,1) 472 drho2mx =atmrho(i1+ind2p,2) -atmrho(i1+ind2m,2) 473 drho2my =atmrho(i1+ind2p,3) -atmrho(i1+ind2m,3) 474 drho2mz =atmrho(i1+ind2p,4) -atmrho(i1+ind2m,4) 475 drho3tot=atmrho(i1+ind3p,1) -atmrho(i1+ind3m,1) 476 drho3mx =atmrho(i1+ind3p,2) -atmrho(i1+ind3m,2) 477 drho3my =atmrho(i1+ind3p,3) -atmrho(i1+ind3m,3) 478 drho3mz =atmrho(i1+ind3p,4) -atmrho(i1+ind3m,4) 479 drho11=half*(drho1tot+drho1mz) 480 drho12=half*(drho1tot-drho1mz) 481 drho13= half*drho1mx 482 drho14=-half*drho1my 483 drho21=half*(drho2tot+drho2mz) 484 drho22=half*(drho2tot-drho2mz) 485 drho23= half*drho2mx 486 drho24=-half*drho2my 487 drho31=half*(drho3tot+drho3mz) 488 drho32=half*(drho3tot-drho3mz) 489 drho33= half*drho3mx 490 drho34=-half*drho3my 491 if(mpi_enreg%paral_kgb==1) then 492 vresid1=work_tot(ifft,1) 493 vresid2=work_tot(ifft,2) 494 vresid3=work_tot(ifft,3) 495 vresid4=work_tot(ifft,4) 496 else 497 vresid1=work(ifft,1) 498 vresid2=work(ifft,2) 499 vresid3=work(ifft,3) 500 vresid4=work(ifft,4) 501 end if 502 gresid(1,iatom)=gresid(1,iatom)+drho11*vresid1+drho12*vresid2+two*(drho13*vresid3+drho14*vresid4) 503 gresid(2,iatom)=gresid(2,iatom)+drho21*vresid1+drho22*vresid2+two*(drho23*vresid3+drho24*vresid4) 504 gresid(3,iatom)=gresid(3,iatom)+drho31*vresid1+drho32*vresid2+two*(drho33*vresid3+drho34*vresid4) 505 end if 506 ! Treat the case option==2 now : trilinear interpolation of the density 507 else 508 i1_new=i1+diff_igrid(1) ; if(i1_new > n1)i1_new=i1_new-n1 509 ifft_new=i1_new+index_new 510 do ispden=1,dtset%nspden 511 drhox00=(atmrho(i1m+index,ispden)-atmrho(i1+index,ispden))*diff_rem1 & 512 & +atmrho(i1+index,ispden) 513 drhox10=(atmrho(i1m+ind2m,ispden)-atmrho(i1+ind2m,ispden))*diff_rem1 & 514 & +atmrho(i1+ind2m,ispden) 515 drhox01=(atmrho(i1m+ind3m,ispden)-atmrho(i1+ind3m,ispden))*diff_rem1 & 516 & +atmrho(i1+ind3m,ispden) 517 drhox11=(atmrho(i1m+ind2m3m,ispden)-atmrho(i1+ind2m3m,ispden))*diff_rem1 & 518 & +atmrho(i1+ind2m3m,ispden) 519 drhoxy0=(drhox10-drhox00)*diff_rem2+drhox00 520 drhoxy1=(drhox11-drhox01)*diff_rem2+drhox01 521 drhoxyz=(drhoxy1-drhoxy0)*diff_rem3+drhoxy0 522 if(mpi_enreg%paral_kgb==1) then 523 work_tot(ifft_new,ispden)=work_tot(ifft_new,ispden)+drhoxyz 524 else 525 work(ifft_new,ispden)=work(ifft_new,ispden)+drhoxyz 526 end if 527 rho_tot(ispden)=rho_tot(ispden)+drhoxyz 528 end do 529 end if 530 531 ! End condition of belonging to the sphere of influence of the atom 532 end if 533 end do 534 end do 535 end do 536 ! The finite-difference factor applied here also take 537 ! into account diverse factors 538 fact=-ucvol/dble(nfftot) 539 gresid(1,iatom)=gresid(1,iatom)*dble(n1)*.5_dp*fact 540 gresid(2,iatom)=gresid(2,iatom)*dble(n2)*.5_dp*fact 541 gresid(3,iatom)=gresid(3,iatom)*dble(n3)*.5_dp*fact 542 end if 543 544 ! Update work if the atom is fixed. 545 if(iloop==2 .and. option==2 .and. atmove==0)then 546 if(mpi_enreg%paral_kgb==1) then 547 ! FFT parallelization: All the processors perform the same calculation. 548 ! We divided by nproc_fft in order to "remove" the xmpi_sum made in mean_fftr 549 do ispden=1,dtset%nspden 550 do ifft=1,nfft_tmp 551 work_tot(ifft,ispden)=work_tot(ifft,ispden)+atmrho(ifft,ispden) 552 end do 553 end do 554 call mean_fftr(atmrho,rhosum,nfft_tmp,nfftot,dtset%nspden,mpi_comm_sphgrid=mpi_enreg%comm_fft) 555 rhosum(1:dtset%nspden)=rhosum(1:dtset%nspden)/mpi_enreg%nproc_fft 556 else 557 do ispden=1,dtset%nspden 558 do ifft=1,nfft_tmp 559 work(ifft,ispden)=work(ifft,ispden)+atmrho(ifft,ispden) 560 end do 561 end do 562 call mean_fftr(atmrho,rhosum,nfft_tmp,nfftot,dtset%nspden,mpi_comm_sphgrid=mpi_enreg%comm_fft) 563 end if 564 565 rho_tot(1:dtset%nspden)=rho_tot(1:dtset%nspden)+rhosum(1:dtset%nspden)*nfftot 566 end if 567 568 ABI_DEALLOCATE(ii) 569 ABI_DEALLOCATE(rrdiff) 570 571 ! End loop on atoms 572 end do 573 574 ! DEBUG 575 ! if(option==2)then 576 ! if(iloop==1)then 577 ! write(std_out,*)' fresid : rhor, approp' 578 ! do ifft=1,n1 579 ! write(std_out,*)ifft,rhor(ifft,1),approp(ifft) 580 ! end do 581 ! end if 582 ! if(iloop==2)then 583 ! write(std_out,*)' fresid : rhor, approp, work(:,:)' 584 ! do ifft=1,n1 585 ! write(std_out,'(i4,3es18.8)' )ifft,rhor(ifft,1),approp(ifft),work(ifft,1) 586 ! end do 587 ! do ifft=1,nfft_tmp 588 ! if(work(ifft,1)<0.0_dp)then 589 ! write(std_out,*)' f_fft negative value',work(ifft,1),' for ifft=',ifft 590 ! end if 591 ! if(rhor(ifft,1)<0.0_dp)then 592 ! write(std_out,*)' rhor negative value',rhor(ifft,1),' for ifft=',ifft 593 ! end if 594 ! end do 595 ! end if 596 ! end if 597 ! ENDDEBUG 598 599 if(quit==1)exit 600 601 ! At the end of the first loop, where the appropriation function is generated, 602 ! invert it, to save cpu time later. 603 if(iloop==1)approp(:)=1.0_dp/approp(:) 604 605 ! End first or second pass through atoms 606 end do 607 608 !Restore proper normalisation of density 609 !(Non-collinear magnetism: n, mx,my,mz integral conservation) 610 if(option==2)then 611 if(mpi_enreg%paral_kgb==1) then 612 ! FFT parallelization: All the processors perform the same calculation. 613 ! We divided by nproc_fft in order to "remove" the xmpi_sum made in mean_fftr 614 ! Trangel: mpicomm now is optional in mean_fftr, no need to divide over nproc_fft 615 call mean_fftr(rhor_tot,rhosum,nfft_tmp,nfftot,dtset%nspden) 616 ! rhosum(1:dtset%nspden)=rhosum(1:dtset%nspden)/mpi_enreg%nproc_fft 617 else 618 call mean_fftr(rhor,rhosum,nfft_tmp,nfftot,dtset%nspden,mpi_comm_sphgrid=mpi_enreg%comm_fft) 619 end if 620 ! "!OCL NOPREEX" to avoid zero division after optimization (-Of) by MM 621 ! (Even if nspden=1, "1.0/rho_tot" will appear on vpp fujitsu 622 ! OCL NOPREEX 623 if(mpi_enreg%paral_kgb==1) then 624 do ispden=1,dtset%nspden 625 fact=rhosum(ispden)*dble(nfftot)/rho_tot(ispden) 626 work_tot(:,ispden)=fact*work_tot(:,ispden) 627 call pre_scatter(work(:,ispden),work_tot(:,ispden),n1,n2,n3,n4,mpi_enreg) 628 end do 629 else 630 do ispden=1,dtset%nspden 631 fact=rhosum(ispden)*dble(nfftot)/rho_tot(ispden) 632 work(:,ispden)=fact*work(:,ispden) 633 end do 634 end if 635 ! DEBUG 636 ! Here, zero all the hard work, for checking purposes ! 637 ! work(:,:)=rhor(:,:) 638 ! ENDDEBUG 639 end if 640 641 ABI_DEALLOCATE(approp) 642 ABI_DEALLOCATE(atmrho) 643 ABI_DEALLOCATE(my_sphere) 644 if(mpi_enreg%paral_kgb==1) then 645 ABI_DEALLOCATE(rhor_tot) 646 ABI_DEALLOCATE(work_tot) 647 end if 648 649 !DEBUG 650 !write(std_out,*)' fresid : exit ' 651 !do iatom=1,dtset%natom 652 !write(std_out,*)iatom,gresid(1:3,iatom) 653 !enddo 654 !ENDDEBUG 655 656 contains 657 658 function cross_fr(xx,yy,zz,aa,bb,cc) 659 !Define magnitude of cross product of two vectors 660 661 !This section has been created automatically by the script Abilint (TD). 662 !Do not modify the following lines by hand. 663 #undef ABI_FUNC 664 #define ABI_FUNC 'cross_fr' 665 !End of the abilint section 666 667 real(dp) :: cross_fr 668 real(dp),intent(in) :: xx,yy,zz,aa,bb,cc 669 cross_fr=sqrt((yy*cc-zz*bb)**2+(zz*aa-xx*cc)**2+(xx*bb-yy*aa)**2) 670 end function cross_fr 671 672 end subroutine fresid