TABLE OF CONTENTS
ABINIT/vtorhorec [ Functions ]
NAME
vtorhorec
FUNCTION
This routine computes the new density from a fixed potential (vtrial) using a recursion method
COPYRIGHT
Copyright (C) 2008-2018 ABINIT group (SLeroux,MMancini). This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt . For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
INPUTS
deltastep= if 0 the iteration step is equal to dtset%nstep initialized= if 0 the initialization of the gstate run is not yet finished operator (ground-state symmetries) dtset <type(dataset_type)>=all input variables for this dataset irrzon(nfft**(1-1/nsym),2,(nspden/nsppol)-3*(nspden/4))=irreducible zone data nfftf=(effective) number of FFT grid points (for this processor) nspden=number of spin-density components nsppol=1 for unpolarized, 2 for spin-polarized nsym=number of symmetry elements in space group phnons(2,nfft**(1-1/nsym),(nspden/nsppol)-3*(nspden/4))=nonsymmorphic translation phases vtrial(nfft,nspden)=INPUT Vtrial(r). rset <type(recursion_type)> all variables for recursion rprimd(3,3)=dimensional primitive translations in real space (bohr) gprimd(3,3)=dimensional primitive translations in reciprocal space
OUTPUT
ek=kinetic energy part of total energy. enl=nonlocal pseudopotential part of total energy. entropy=entropy due to the occupation number smearing (if metal) e_eigenvalues=Sum of the eigenvalues - Band energy (Hartree) fermie=fermi energy (Hartree) grnl(3*natom)=stores grads of nonlocal energy wrt length scales (3x3 tensor) and grads wrt atomic coordinates (3*natom)
SIDE EFFECTS
rhog(2,nfft)=array for Fourier transform of electron density rhor(nfft,nspden)=array for electron density in electrons/bohr**3. rset%efermi= fermi energy
PARENTS
scfcv
CHILDREN
calcnrec,cudarec,destroy_distribfft,entropyrec,fermisolverec gran_potrec,init_distribfft,nlenergyrec,prtwork,recursion,reshape_pot symrhg,timab,transgrid,wrtout,xmpi_allgatherv,xmpi_barrier,xmpi_max xmpi_sum,xredistribute
NOTES
at this time : - grnl in not implemented - symetrie usage not implemented (irrzon not used and nsym should be 1) - spin-polarized not implemented (nsppol must be 1, nspden ?) - phnons used only in symrhg - need a rectangular box (ngfft(1)=ngfft(2)=ngfft(3))
SOURCE
66 #if defined HAVE_CONFIG_H 67 #include "config.h" 68 #endif 69 70 #include "abi_common.h" 71 72 subroutine vtorhorec(dtset,& 73 & ek,enl,entropy,e_eigenvalues,fermie,& 74 & grnl,initialized,irrzon,nfftf,phnons,& 75 & rhog, rhor, vtrial,rset,deltastep,rprimd,gprimd) 76 77 use defs_basis 78 use defs_abitypes 79 use defs_rectypes 80 use m_xmpi 81 use m_pretty_rec 82 use m_errors 83 use m_profiling_abi 84 85 use m_rec, only : Calcnrec 86 use m_rec_tools, only : reshape_pot 87 #ifdef HAVE_GPU_CUDA 88 use m_initcuda, only : cudap 89 use m_hidecudarec 90 use m_xredistribute 91 #endif 92 93 !This section has been created automatically by the script Abilint (TD). 94 !Do not modify the following lines by hand. 95 #undef ABI_FUNC 96 #define ABI_FUNC 'vtorhorec' 97 use interfaces_14_hidewrite 98 use interfaces_18_timing 99 use interfaces_65_paw 100 use interfaces_67_common 101 use interfaces_68_recursion, except_this_one => vtorhorec 102 !End of the abilint section 103 104 implicit none 105 106 !Arguments ------------------------------- 107 !scalars 108 integer,intent(in) :: initialized 109 integer,intent(in) :: nfftf,deltastep 110 real(dp),intent(out) :: e_eigenvalues,ek,enl,entropy,fermie 111 type(dataset_type),intent(in) :: dtset 112 type(recursion_type),intent(inout) :: rset 113 !arrays 114 integer, intent(in) :: irrzon(:,:,:) 115 real(dp),intent(in) :: rprimd(3,3),gprimd(3,3) 116 real(dp),intent(in) :: phnons(:,:,:) 117 real(dp),intent(in) :: vtrial(:,:) 118 real(dp),intent(inout) :: rhog(:,:) 119 real(dp),intent(out) :: grnl(:) !vz_i 120 real(dp),intent(inout) :: rhor(:,:) !vz_i 121 122 !Local variables------------------------------- 123 !scalars 124 integer :: nfftrec, dim_entro,ii1,jj1,kk1 125 integer :: ierr,ii,ilmn,ipsp,dim_trott 126 integer :: ipoint,ipointlocal,jj,kk,irec 127 integer :: n1,n2,n3,n4,n_pt_integ_entropy 128 integer :: nrec,iatom,min_pt,max_pt,swt_tm 129 integer :: get_K_S_G 130 integer :: tim_fourdp,trotter 131 real(dp),parameter :: perc_vmin=one 132 real(dp) :: beta,drho,drhomax 133 real(dp) :: entropy1,entropy2,entropy3,entropy4 134 real(dp) :: entropylocal,entropylocal1,entropylocal2 135 real(dp) :: entropylocal3,entropylocal4,gran_pot,gran_pot1,gran_pot2 136 real(dp) :: gran_pot3,gran_pot4,gran_pot_local,gran_pot_local1 137 real(dp) :: gran_pot_local2,gran_pot_local3,gran_pot_local4 138 real(dp) :: inf_ucvol,intrhov,factor 139 real(dp) :: nelect,potmin,rtrotter,toldrho,tolrec,tsmear 140 real(dp) :: xmax,nlpotmin,ratio1,ratio2,ratio4,ratio8 141 type(recparall_type) :: recpar 142 character(len=500) :: msg 143 !arrays 144 integer :: ngfftrec(18),trasl(3) 145 integer,pointer :: gcart_loc(:,:) 146 integer,allocatable :: bufsize(:), bufdispl(:) 147 real(dp) :: tsec(2),tsec2(2) 148 real(dp) :: exppot(0:dtset%ngfft(1)*dtset%ngfft(2)*dtset%ngfft(3)-1) 149 real(dp),target :: rholocal(1:rset%par%ntranche) 150 real(dp),target :: alocal(0:rset%min_nrec,1:rset%par%ntranche) 151 real(dp),target :: b2local(0:rset%min_nrec,1:rset%par%ntranche) 152 real(dp),pointer :: rho_wrk(:) 153 real(dp),pointer :: a_wrk(:,:),b2_wrk(:,:) 154 real(dp),allocatable :: rholocal_f(:), rholoc_2(:) 155 real(dp),allocatable :: rhogf(:,:),rhogc(:,:),aloc_copy(:,:),b2loc_copy(:,:) 156 real(dp),allocatable :: gran_pot_v_f(:,:),gran_pot_v_c(:,:),gran_pot_v_2(:,:) 157 real(dp),allocatable :: entropy_v_f(:,:),entropy_v_c(:,:),entropy_v_2(:,:) 158 real(dp),allocatable :: ablocal_1(:,:,:),ablocal_2(:,:,:),ablocal_f(:,:,:) 159 real(dp),allocatable :: exppotloc(:) 160 real(dp),allocatable :: projec(:,:,:,:,:) 161 #if defined HAVE_GPU_CUDA 162 integer :: max_rec 163 integer,allocatable :: vcount_0(:), displs_0(:) 164 integer,allocatable :: vcount_1(:), displs_1(:) 165 real(dp),allocatable,target :: rho_hyb(:) 166 real(dp),allocatable,target :: a_hyb(:,:),b2_hyb(:,:) 167 real(cudap),allocatable :: an_dev(:,:) 168 real(cudap),allocatable :: bn2_dev(:,:) 169 #endif 170 171 ! ********************************************************************* 172 if(rset%debug)then 173 write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' vtorhorec : enter ' 174 call wrtout(std_out,msg,'PERS') 175 end if 176 177 call timab(21,1,tsec) 178 call timab(600,1,tsec2) 179 !################################################################################################## 180 !!--Initalization in the FIRST time in VTORHOREC is made in SCFCV by FIRST_REC routine 181 !!--Parameters for the recursion method AND Initialisation 182 183 trotter = dtset%recptrott !--trotter parameter 184 nelect = dtset%nelect !--number of electrons 185 186 toldrho = dtset%rectolden !--tollerance for density 187 tolrec = toldrho*1.d-2 !--tollerance for local density 188 189 tsmear = dtset%tsmear !--temperature 190 beta = one/tsmear !--inverse of temperature 191 192 factor = real(dtset%recgratio**3*100*rset%mpi%nproc,dp)/real(nfftf,dp) 193 194 !--Assignation of the rset variable: 195 nrec = rset%min_nrec 196 nfftrec = rset%nfftrec 197 ngfftrec = rset%ngfftrec 198 inf_ucvol = rset%inf%ucvol 199 200 min_pt = rset%par%displs(rset%mpi%me)+1 201 max_pt = min_pt+rset%par%vcount(rset%mpi%me)-1 202 203 !--In the last self-constistent loop or if density is converged the 204 !thermodynamics quantities are calculated 205 get_K_S_G = 0; if(deltastep==0 .or. rset%quitrec/=0) get_K_S_G = 1; 206 207 !--Rewriting the trotter parameter 208 rtrotter = max(half,real(trotter,dp)) 209 dim_trott = max(0,2*trotter-1) 210 211 !--Variables Optimisation 212 ratio1 = beta/rtrotter 213 ratio2 = ratio1/two 214 ratio4 = ratio1/four 215 ratio8 = ratio1/eight 216 217 !--Integration points for entropy 218 n_pt_integ_entropy = max(25,dtset%recnpath) 219 220 !-- energies non-local: at day not implemented 221 enl = zero 222 grnl = zero 223 !jmb 224 ek = zero 225 226 !--only a copy of ngfft(1:3) and nfft (to purge!!) 227 n1 = dtset%ngfft(1) ; n2 = dtset%ngfft(2) ; n3 = dtset%ngfft(3) 228 n4 = n3 229 230 !--time switch to measure gpu-cpu syncrhonisation 231 swt_tm = 0 ; !no gpu initally 232 233 exppot = zero 234 nullify(gcart_loc) 235 236 if(dtset%rectesteg==1)then 237 ! --Free electron gas case 238 exppot = one 239 else 240 if(.not.(rset%nl%nlpsp)) then 241 ! --Local case 242 ! --COMPUTATION OF exp( -beta*pot/(4*rtrotter)) 243 ABI_ALLOCATE(gcart_loc,(0,0)) 244 gcart_loc = 0 245 exppot = exp( -(ratio4*vtrial(:,1))) 246 else 247 ! --Non-Local case 248 ! --COMPUTATION OF exp(-beta*pot/(8*rtrotter)) 249 exppot = exp( -(ratio8*vtrial(:,1))) 250 251 ABI_ALLOCATE(gcart_loc,(3,dtset%natom)) 252 gcart_loc = rset%inf%gcart 253 ABI_ALLOCATE(projec,(0:ngfftrec(1)-1,0:ngfftrec(2)-1,0:ngfftrec(3)-1,rset%nl%lmnmax,dtset%natom)) 254 projec = zero 255 256 if(.not.(rset%tronc)) then 257 do iatom =1, dtset%natom 258 ipsp = dtset%typat(iatom) 259 do ilmn = 1,rset%nl%lmnmax 260 projec(:,:,:,ilmn,iatom) = reshape(rset%nl%projec(:,ilmn,ipsp),shape=shape(projec(:,:,:,1,1))) 261 do ii=1,3 262 projec(:,:,:,ilmn,iatom) = cshift(projec(:,:,:,ilmn,iatom),shift=ngfftrec(ii)/2-gcart_loc(ii,iatom),dim=ii) 263 end do 264 end do 265 end do 266 end if 267 end if 268 end if 269 270 !################################################################################### 271 !MAIN LOOP 272 273 rholocal = zero; alocal = zero; b2local = zero 274 ipointlocal = 1 275 276 !--Allocation: if hybrid calculation is done then I have to use 277 !balanced work on devices. 278 nullify(rho_wrk,a_wrk,b2_wrk) 279 280 if(rset%load == 1)then 281 #ifdef HAVE_GPU_CUDA 282 ABI_ALLOCATE(rho_hyb,(1:rset%GPU%par%npt)) 283 ABI_ALLOCATE(a_hyb,(0:nrec,1:rset%GPU%par%npt)) 284 ABI_ALLOCATE(b2_hyb,(0:nrec,1:rset%GPU%par%npt)) 285 rho_hyb = zero; a_hyb = zero; b2_hyb = zero 286 rho_wrk => rho_hyb 287 a_wrk => a_hyb 288 b2_wrk => b2_hyb 289 recpar = rset%GPU%par 290 #endif 291 else 292 rho_wrk => rholocal 293 a_wrk => alocal 294 b2_wrk => b2local 295 recpar = rset%par 296 end if 297 298 !#if defined HAVE_GPU_CUDA 299 !if(rset%debug)then 300 !write (std_out,*) 'rset%recGPU%nptrec ',rset%recGPU%nptrec 301 !write (std_out,*) 'rset%gpudevice ',rset%gpudevice 302 !write (std_out,*) 'rset%ngfftrec ',rset%ngfftrec(1:3) 303 !write (std_out,*) 'rset%min_nrec ',rset%min_nrec 304 !write (std_out,*) 'rset%par%ntranche ',rset%par%ntranche 305 !write (std_out,*) 'rset%par%min_pt ',rset%par%min_pt,min_pt 306 !write (std_out,*) 'rset%par%max_pt ',rset%par%max_pt,max_pt 307 !write (std_out,*) 'pt0 ',rset%par%pt0%x,rset%par%pt0%y,rset%par%pt0%z 308 !write (std_out,*) 'pt1 ',rset%par%pt1%x,rset%par%pt1%y,rset%par%pt1%z 309 !end if 310 !#endif 311 312 if(rset%gpudevice>=0) then 313 #if defined HAVE_GPU_CUDA 314 swt_tm = 1; 315 call timab(607,1,tsec2) 316 ABI_ALLOCATE(an_dev,(0:recpar%npt-1,0:nrec)) 317 ABI_ALLOCATE(bn2_dev,(0:recpar%npt-1,0:nrec)) 318 an_dev = zero 319 bn2_dev = zero; bn2_dev(:,0) = one 320 321 call cudarec( rset, exppot,an_dev,bn2_dev,& 322 & beta,trotter,tolrec,dtset%recgratio,dtset%ngfft(:3),max_rec) 323 324 max_rec = min(max_rec,nrec) 325 a_wrk(0:max_rec,1:recpar%npt) = transpose(an_dev(0:,0:max_rec)) 326 b2_wrk(0:max_rec,1:recpar%npt) = transpose(bn2_dev(0:,0:max_rec)) 327 328 ABI_DEALLOCATE(an_dev) 329 ABI_DEALLOCATE(bn2_dev) 330 call timab(607,2,tsec2) 331 332 ipointlocal = recpar%npt+1 333 334 ! !DEBUG CUDA 335 ! if(rset%debug)then 336 ! if( rset%mpi%me==0)then 337 ! do ipoint = 1,rset%par%npt,1 338 ! kk=ipoint/(product(dtset%ngfft(:2))) 339 ! jj=ipoint/dtset%ngfft(1)-kk*dtset%ngfft(2) 340 ! ii=ipoint-jj*dtset%ngfft(1)-kk*dtset%ngfft(2)*dtset%ngfft(1) 341 ! write(msg,'(a,4i8,2(a,a9,5f12.6))')& 342 ! & 'pt',ipoint,ii,jj,kk,& 343 ! & ch10,'an-gpu ',real(alocal(:4,ipoint)),& 344 ! & ch10,'b2n-gpu ',real(b2local(:4,ipoint)) 345 ! call wrtout(std_out,msg,'COLL') 346 ! end do 347 ! endif 348 ! endif 349 ! !ENDDEBUG CUDA 350 #endif 351 352 else 353 if (.not.(rset%tronc)) then 354 graou1 : do kk = recpar%pt0%z,recpar%pt1%z,dtset%recgratio 355 do jj = 0,n2-1,dtset%recgratio 356 do ii = 0,n1-1,dtset%recgratio 357 ipoint = ii+(jj+kk*n2)*n1 358 ! --Local position of atoms 359 if (ipoint<recpar%min_pt) cycle 360 ! --Computation done by that proc 361 tim_fourdp=6 362 call recursion(exppot,ii,jj,kk, & 363 & a_wrk(:,ipointlocal), & 364 & b2_wrk(:,ipointlocal), & 365 & rho_wrk(ipointlocal),& 366 & nrec, rset%efermi,tsmear,rtrotter,dim_trott, & 367 & rset%ZT_p, & 368 & tolrec,dtset%typat,rset%nl,& 369 & rset%mpi,nfftrec,ngfftrec,rset%inf,& 370 & tim_fourdp,dtset%natom,projec,1) 371 ipointlocal = ipointlocal + 1 372 ! write(std_out,*)'ipointlocal',ipoint,ipointlocal,ii,jj,kk 373 call prtwork(dtset%recgratio**3*ipointlocal*100*rset%mpi%nproc/nfftrec) 374 if(ipoint==recpar%max_pt) exit graou1 375 end do 376 end do 377 end do graou1 378 else !--We use a troncation 379 ABI_ALLOCATE(exppotloc,(0:nfftrec-1)) 380 graou2 : do kk = recpar%pt0%z,recpar%pt1%z,dtset%recgratio 381 do jj = 0,n2-1,dtset%recgratio 382 do ii = 0,n1-1,dtset%recgratio 383 ipoint = ii+(jj+kk*n2)*n1 384 if (ipoint<recpar%min_pt) cycle 385 ! computation done by that proc 386 exppotloc = zero 387 ! --Traslation to move position on the ngfftrec grid center 388 trasl = -(/ii,jj,kk/)+ngfftrec(:3)/2 389 if(rset%nl%nlpsp) then 390 do iatom=1,dtset%natom 391 ! --local position of atoms 392 gcart_loc(:,iatom) = rset%inf%gcart(:,iatom)+trasl 393 gcart_loc(:,iatom) = modulo(gcart_loc(:,iatom),(/n1,n2,n3/)) 394 ! --Traslation of non-local projectors 395 do ilmn = 1,rset%nl%lmnmax 396 projec(:,:,:,ilmn,iatom) = reshape(rset%nl%projec(:,ilmn,dtset%typat(iatom)),shape=shape(projec(:,:,:,1,1))) 397 do ii1=1,3 398 projec(:,:,:,ilmn,iatom) = eoshift(projec(:,:,:,ilmn,iatom),shift=ngfftrec(ii1)/2-gcart_loc(ii1,iatom),dim=ii1) 399 end do 400 end do 401 end do 402 end if 403 404 call reshape_pot(trasl,nfftf,nfftrec,& 405 & dtset%ngfft(:3),ngfftrec(:3),& 406 & exppot,exppotloc) 407 408 tim_fourdp=6 409 call recursion(exppotloc,ngfftrec(1)/2,ngfftrec(2)/2,ngfftrec(3)/2, & 410 & a_wrk(:,ipointlocal), & 411 & b2_wrk(:,ipointlocal), & 412 & rho_wrk(ipointlocal),& 413 & nrec, rset%efermi,tsmear,rtrotter,dim_trott, & 414 & rset%ZT_p, & 415 & tolrec,dtset%typat,rset%nl,& 416 & rset%mpi,nfftrec,ngfftrec,rset%inf,& 417 & tim_fourdp,dtset%natom,projec,1) 418 ipointlocal = ipointlocal + 1 419 call prtwork(factor*real(ipointlocal,dp)) 420 if(ipoint==recpar%max_pt) exit graou2 421 end do 422 end do 423 end do graou2 424 ABI_DEALLOCATE(exppotloc) 425 end if 426 427 end if 428 write(msg,'( a12,i12)')'ipointlocal',ipointlocal 429 call wrtout(std_out,msg,'PERS') 430 call timab(613+swt_tm,1,tsec2) !!--start time-counter: sync gpu-cpu 431 call xmpi_barrier(rset%mpi%comm_bandfft) 432 call timab(613+swt_tm,2,tsec2) !!--stop time-counter: sync gpu-cpu 433 434 !!############################################################# 435 !!--ASSIGNATION PARAMETERS TO MENAGE PARALLELISME OF TRANSGRID 436 if((rset%load==1 .or. dtset%recgratio>1))then 437 ! --Bufsize contains the values of the number of points calculated 438 ! by any proc on the coarse grid; bufsize_f on the fine grid 439 call timab(604,1,tsec2) !--start time-counter: transgrid 440 ABI_ALLOCATE(bufsize,(0:rset%mpi%nproc-1)) 441 ABI_ALLOCATE(bufdispl,(0:rset%mpi%nproc-1)) 442 bufsize = 0; 443 bufsize(rset%mpi%me) = rset%par%npt 444 call xmpi_sum(bufsize,rset%mpi%comm_bandfft,ierr) 445 446 bufdispl(0) = 0; 447 if(rset%mpi%nproc>1) bufdispl(1:) = (/(sum(bufsize(:ii)),ii=0,rset%mpi%nproc-1)/) 448 call timab(604,2,tsec2) !--stop time-counter: transgrid 449 end if 450 !!#################################################################### 451 !!--REDISTRIBUTION OF LOAD ON PROCS AFTER RECURSION IF CUDA IS USED 452 #ifdef HAVE_GPU_CUDA 453 if(rset%load==1)then 454 call timab(604,1,tsec2) !--start time-counter: transgrid 455 call xredistribute(rho_hyb,rset%GPU%par%vcount,rset%GPU%par%displs,& 456 & rholocal,bufsize,bufdispl,rset%mpi%me,& 457 & rset%mpi%nproc,& 458 & rset%mpi%comm_bandfft,ierr) 459 460 461 ABI_ALLOCATE(vcount_0,(0:rset%mpi%nproc-1)) 462 ABI_ALLOCATE(displs_0,(0:rset%mpi%nproc-1)) 463 ABI_ALLOCATE(vcount_1,(0:rset%mpi%nproc-1)) 464 ABI_ALLOCATE(displs_1,(0:rset%mpi%nproc-1)) 465 466 vcount_0 = 0 467 vcount_0(rset%mpi%me) = rset%par%npt*(nrec+1) 468 call xmpi_sum(vcount_0,rset%mpi%comm_bandfft,ierr) 469 displs_0 = 0 470 if(rset%mpi%nproc>1) displs_0(1:) = (/(sum(vcount_0(:ii)),ii=0,rset%mpi%nproc-1)/) 471 472 473 vcount_1 = 0 474 vcount_1(rset%mpi%me) = rset%GPU%par%npt*(nrec+1) 475 call xmpi_sum(vcount_1,rset%mpi%comm_bandfft,ierr) 476 displs_1 = 0 477 if(rset%mpi%nproc>1) displs_1(1:) = (/(sum(vcount_1(:ii)),ii=0,rset%mpi%nproc-1)/) 478 479 480 call xredistribute(a_hyb,vcount_1,displs_1,& 481 & alocal,vcount_0,displs_0,& 482 & rset%mpi%me,rset%mpi%nproc,& 483 & rset%mpi%comm_bandfft,ierr) 484 485 call xredistribute(b2_hyb,vcount_1,displs_1,& 486 & b2local,vcount_0,displs_0,& 487 & rset%mpi%me,rset%mpi%nproc,& 488 & rset%mpi%comm_bandfft,ierr) 489 490 nullify(rho_wrk,a_wrk,b2_wrk) 491 ABI_DEALLOCATE(rho_hyb) 492 ABI_DEALLOCATE(a_hyb) 493 ABI_DEALLOCATE(b2_hyb) 494 ABI_DEALLOCATE(vcount_0) 495 ABI_DEALLOCATE(displs_0) 496 ABI_DEALLOCATE(vcount_1) 497 ABI_DEALLOCATE(displs_1) 498 call timab(604,2,tsec2) !--start time-counter: transgrid 499 end if 500 #endif 501 502 !############################################################# 503 !--TRANSGRID FOR THE DENSITY RHO AND THE COEFFICIENTS AN AND B2N 504 if (dtset%recgratio>1) then 505 ! --variables allocation and initialisation------- 506 write (msg,'(a)')' - TRANSGRID USING -----' 507 call wrtout(std_out,msg,'COLL') 508 call timab(604,1,tsec2) !--start time-counter: transgrid 509 510 ABI_ALLOCATE(rholocal_f,(rset%pawfgr%nfft)) 511 ABI_ALLOCATE(rhogf,(2,rset%pawfgr%nfft)) 512 ABI_ALLOCATE(rhogc,(2,rset%pawfgr%nfftc)) 513 ABI_ALLOCATE(rholoc_2,(1:rset%pawfgr%nfftc)) 514 ABI_ALLOCATE(ablocal_2,(1:rset%pawfgr%nfftc,0:nrec,2)) 515 ABI_ALLOCATE(ablocal_f,(1:rset%pawfgr%nfft,0:nrec,2)) 516 ABI_ALLOCATE(ablocal_1,(1:rset%par%npt,0:nrec,2)) 517 518 call destroy_distribfft(rset%mpi%distribfft) 519 call init_distribfft(rset%mpi%distribfft,'c',rset%mpi%nproc_fft,rset%pawfgr%ngfftc(2) ,rset%pawfgr%ngfftc(3)) 520 call init_distribfft(rset%mpi%distribfft,'f',rset%mpi%nproc_fft,rset%pawfgr%ngfft(2) ,rset%pawfgr%ngfft(3)) 521 522 rholocal_f = zero; ablocal_f = zero; ablocal_2 = zero 523 524 ablocal_1(:,:,1) = transpose(alocal(:,1:rset%par%npt)) 525 ablocal_1(:,:,2) = transpose(b2local(:,1:rset%par%npt)) 526 527 if(get_K_S_G==1 .and. dtset%recgratio>1 ) then 528 ABI_ALLOCATE(aloc_copy,(0:nrec,1:rset%par%npt)) 529 ABI_ALLOCATE(b2loc_copy,(0:nrec,1:rset%par%npt)) 530 aloc_copy = alocal(:,1:rset%par%npt) 531 b2loc_copy = b2local(:,1:rset%par%npt) 532 end if 533 534 if(rset%mpi%nproc ==1) then 535 ! --SEQUENTIAL CASE-- 536 rholoc_2 = rholocal(1:rset%par%npt) 537 ablocal_2 = ablocal_1 538 ! --Transigrid: coarse->fine 539 rhogf = zero; rhogc = zero 540 call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,rhogf,rholoc_2,rholocal_f) 541 do jj1 = 1,2 542 do ipoint = 0,nrec 543 rhogf = zero; rhogc = zero 544 call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,rhogf,& 545 & ablocal_2(:,ipoint,jj1),ablocal_f(:,ipoint,jj1)) 546 end do 547 end do 548 ! --Assignation of the interpolated results on the fine grid-- 549 rholocal = rholocal_f 550 alocal = transpose(ablocal_f(:,:,1)) 551 b2local = transpose(ablocal_f(:,:,2)) 552 553 else 554 ! --PARALLEL CASE-- 555 rholoc_2 = zero 556 ! --Send on all procs rho,an,bn-- 557 call xmpi_allgatherv(rholocal(1:rset%par%npt),bufsize(rset%mpi%me),rholoc_2,& 558 & bufsize,bufdispl,rset%mpi%comm_bandfft,ierr) 559 do irec = 0,nrec 560 call xmpi_allgatherv(ablocal_1(1:rset%par%npt,irec,1),bufsize(rset%mpi%me),& 561 & ablocal_2(:,irec,1),bufsize,bufdispl,rset%mpi%comm_bandfft,ierr) 562 call xmpi_allgatherv(ablocal_1(1:rset%par%npt,irec,2),bufsize(rset%mpi%me),& 563 & ablocal_2(:,irec,2),bufsize,bufdispl,rset%mpi%comm_bandfft,ierr) 564 end do 565 566 567 ! --Transigrid: coarse->fine on differents procs (with respect 568 ! the number of recursion) 569 rhogf = zero; rhogc = zero 570 call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,rhogf,rholoc_2,rholocal_f) 571 572 do irec = 0,2*(nrec+1)-1 573 ii1 = modulo(irec,rset%mpi%nproc) 574 jj1 = 1+modulo(irec,2) 575 kk1 = floor(irec/2.) 576 if(maxval(abs(ablocal_2(:,kk1,jj1))) > tol10 .and. rset%mpi%me == ii1) then 577 rhogf = zero; rhogc = zero 578 call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,& 579 & rhogf,ablocal_2(:,kk1,jj1),ablocal_f(:,kk1,jj1)) 580 end if 581 end do 582 583 ! --Recuperation of all interpolated results 584 ! from procs to allprocs 585 call xmpi_sum(ablocal_f,rset%mpi%comm_bandfft,ierr) 586 587 ! --Assignation the interpolated results on the fine grid 588 ! any procs to obtain the same point as in the standard recursion 589 do ii1 = 0, rset%mpi%nproc-1 590 jj1 = rset%par%displs(ii1)+1 591 if(ii1 == rset%mpi%me) then 592 alocal = transpose(ablocal_f(jj1:jj1+rset%par%vcount(ii1)-1,:,1)) 593 b2local = transpose(ablocal_f(jj1:jj1+rset%par%vcount(ii1)-1,:,2)) 594 rholocal = rholocal_f(jj1:jj1+rset%par%vcount(ii1)-1) 595 end if 596 end do 597 end if 598 ABI_DEALLOCATE(ablocal_f) 599 ABI_DEALLOCATE(ablocal_2) 600 ABI_DEALLOCATE(ablocal_1) 601 ABI_DEALLOCATE(rhogf) 602 ABI_DEALLOCATE(rholocal_f) 603 ABI_DEALLOCATE(rhogc) 604 ABI_DEALLOCATE(rholoc_2) 605 606 call timab(604,2,tsec2) !--stop time-counter: transgrid 607 else 608 write(msg,'(a)')' - TRANSGRID NOT USED --' 609 call wrtout(std_out,msg,'COLL') 610 end if 611 !!--End transgrid 612 !!############################################################### 613 !################################### 614 !--Fermi energy computation 615 !--find the good mu by imposing the electrons number 616 call fermisolverec(rset%efermi,rholocal,alocal,b2local,rset%debug,& 617 & rset%min_nrec,tsmear,trotter,nelect,tol10,100, & 618 & rset%par%ntranche,rset%mpi,inf_ucvol,& !.False. .and.& 619 & (rset%tp==2 .or. rset%tp==3) .and. trotter>1) 620 621 !################################################################# 622 !######### ENTROPY AND GRAN POTENTIAL COMPUTATION ################## 623 entropy = zero 624 gran_pot = zero 625 noentropie : if(get_K_S_G==1)then 626 entropy1 = zero; entropy2 = zero ;entropy3 = zero; entropy4 = zero 627 gran_pot1 = zero ; gran_pot2 = zero; gran_pot3 = zero; gran_pot4 = zero 628 629 ! --Seek for the min of the path integral 630 potmin = zero; nlpotmin = zero 631 if(dtset%rectesteg/=1) potmin = minval(vtrial(:,1)) 632 if(rset%nl%nlpsp) nlpotmin = minval(rset%nl%eival(:,:,:)) 633 xmax = exp(-ratio2*(potmin+nlpotmin-rset%efermi)) 634 635 dim_entro = 0; if(rset%debug) dim_entro = 4; 636 637 if(dtset%recgratio>1) then 638 ! --Recgratio>1 639 ABI_ALLOCATE(rhogf,(2,rset%pawfgr%nfft)) 640 ABI_ALLOCATE(rhogc,(2,rset%pawfgr%nfftc)) 641 ABI_ALLOCATE(entropy_v_f,(rset%pawfgr%nfft,0:4)) 642 ABI_ALLOCATE(entropy_v_c,(rset%pawfgr%nfftc,0:4)) 643 ABI_ALLOCATE(entropy_v_2,(1:rset%par%npt,0:4)) 644 ABI_ALLOCATE(gran_pot_v_f,(rset%pawfgr%nfft,0:4)) 645 ABI_ALLOCATE(gran_pot_v_c,(rset%pawfgr%nfftc,0:4)) 646 ABI_ALLOCATE(gran_pot_v_2,(1:rset%par%npt,0:4)) 647 648 entropy_v_c = zero; entropy_v_f = zero; entropy_v_2 = zero 649 gran_pot_v_c = zero; gran_pot_v_f = zero; gran_pot_v_2 = zero 650 651 652 do ipoint = 1,rset%par%npt 653 call entropyrec(exp(rset%efermi*ratio2)*aloc_copy(:,ipoint), & 654 & exp(rset%efermi*ratio1)*b2loc_copy(:,ipoint), & 655 & nrec,trotter,entropy_v_2(ipoint,0),two,& 656 & rset%debug,n_pt_integ_entropy,perc_vmin*xmax,& 657 & entropy_v_2(ipoint,1),& 658 & entropy_v_2(ipoint,2),& 659 & entropy_v_2(ipoint,3),& 660 & entropy_v_2(ipoint,4)) 661 662 call gran_potrec(exp(rset%efermi*ratio2)*aloc_copy(:,ipoint), & 663 & exp(rset%efermi*ratio1)*b2loc_copy(:,ipoint), & 664 & nrec,trotter,gran_pot_v_2(ipoint,0),two,& 665 & rset%debug,n_pt_integ_entropy,perc_vmin*xmax,& 666 & gran_pot_v_2(ipoint,1),& 667 & gran_pot_v_2(ipoint,2),& 668 & gran_pot_v_2(ipoint,3),& 669 & gran_pot_v_2(ipoint,4)) 670 end do 671 ABI_DEALLOCATE(aloc_copy) 672 ABI_DEALLOCATE(b2loc_copy) 673 674 call timab(613+swt_tm,1,tsec2) !!--start time-counter: sync gpu-cpu 675 call xmpi_barrier(rset%mpi%comm_bandfft) 676 call timab(613+swt_tm,2,tsec2) !!--stop time-counter: sync gpu-cpu 677 678 call timab(604,1,tsec2) !--start time-counter: transgrid 679 if(rset%mpi%nproc==1) then 680 entropy_v_c = entropy_v_2 681 gran_pot_v_c = gran_pot_v_2 682 end if 683 do ii1 = 0,dim_entro 684 call xmpi_allgatherv(entropy_v_2(:,ii1),bufsize(rset%mpi%me),& 685 & entropy_v_c(:,ii1),bufsize,bufdispl,& 686 & rset%mpi%comm_bandfft,ierr) 687 call xmpi_allgatherv(gran_pot_v_2(:,ii1),bufsize(rset%mpi%me),& 688 & gran_pot_v_c(:,ii1),bufsize,bufdispl,& 689 & rset%mpi%comm_bandfft,ierr) 690 691 if(maxval(abs(entropy_v_c(:,ii1))) > tol10) then 692 rhogf = zero; rhogc = zero 693 call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,& 694 & rset%pawfgr,rhogc,rhogf,entropy_v_c(:,ii1),entropy_v_f(:,ii1)) 695 end if 696 if(maxval(abs(gran_pot_v_c(:,ii1))) >tol10) then 697 rhogf = zero; rhogc = zero 698 call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,& 699 & rset%pawfgr,rhogc,rhogf,gran_pot_v_c(:,ii1),gran_pot_v_f(:,ii1)) 700 end if 701 end do 702 call timab(604,2,tsec2) !--stop time-counter: transgrid 703 704 entropy = sum(entropy_v_f(:,0)) 705 gran_pot = sum(gran_pot_v_f(:,0)) 706 707 if(rset%debug)then 708 entropy1 = sum(entropy_v_f(:,1)) 709 entropy2 = sum(entropy_v_f(:,2)) 710 entropy3 = sum(entropy_v_f(:,3)) 711 entropy4 = sum(entropy_v_f(:,4)) 712 gran_pot1 = sum(gran_pot_v_f(:,1)) 713 gran_pot2 = sum(gran_pot_v_f(:,2)) 714 gran_pot3 = sum(gran_pot_v_f(:,3)) 715 gran_pot4 = sum(gran_pot_v_f(:,4)) 716 end if 717 718 ABI_DEALLOCATE(entropy_v_f) 719 ABI_DEALLOCATE(entropy_v_c) 720 ABI_DEALLOCATE(entropy_v_2) 721 ABI_DEALLOCATE(rhogf) 722 ABI_DEALLOCATE(rhogc) 723 ABI_DEALLOCATE(gran_pot_v_f) 724 ABI_DEALLOCATE(gran_pot_v_c) 725 ABI_DEALLOCATE(gran_pot_v_2) 726 727 728 else 729 ! --Recgratio=1 730 do ipoint = 1,rset%par%ntranche 731 call entropyrec(exp(rset%efermi*ratio2)*alocal(:,ipoint), & 732 & exp(rset%efermi*ratio1)*b2local(:,ipoint), & 733 & nrec,trotter,entropylocal,two,& 734 & rset%debug,n_pt_integ_entropy,perc_vmin*xmax,& 735 & entropylocal1,entropylocal2,& 736 & entropylocal3,entropylocal4) 737 call gran_potrec(exp(rset%efermi*ratio2)*alocal(:,ipoint), & 738 & exp(rset%efermi*ratio1)*b2local(:,ipoint), & 739 & nrec,trotter,gran_pot_local,two,& !/ucvol,& 740 & rset%debug,n_pt_integ_entropy,perc_vmin*xmax,& 741 & gran_pot_local1,gran_pot_local2,& 742 & gran_pot_local3,gran_pot_local4) 743 744 entropy = entropy + entropylocal 745 gran_pot = gran_pot + gran_pot_local 746 if(rset%debug)then 747 entropy1 = entropy1 + entropylocal1 748 entropy2 = entropy2 + entropylocal2 749 entropy3 = entropy3 + entropylocal3 750 entropy4 = entropy4 + entropylocal4 751 gran_pot1 = gran_pot1 + gran_pot_local1 752 gran_pot2 = gran_pot2 + gran_pot_local2 753 gran_pot3 = gran_pot3 + gran_pot_local3 754 gran_pot4 = gran_pot4 + gran_pot_local4 755 end if 756 757 end do 758 759 call xmpi_sum(entropy,rset%mpi%comm_bandfft ,ierr) 760 call xmpi_sum(gran_pot,rset%mpi%comm_bandfft ,ierr) 761 if(rset%debug)then 762 call xmpi_sum(entropy1,rset%mpi%comm_bandfft ,ierr) 763 call xmpi_sum(entropy2,rset%mpi%comm_bandfft ,ierr) 764 call xmpi_sum(entropy3,rset%mpi%comm_bandfft ,ierr) 765 call xmpi_sum(entropy4,rset%mpi%comm_bandfft ,ierr) 766 call xmpi_sum(gran_pot1,rset%mpi%comm_bandfft ,ierr) 767 call xmpi_sum(gran_pot2,rset%mpi%comm_bandfft ,ierr) 768 call xmpi_sum(gran_pot3,rset%mpi%comm_bandfft ,ierr) 769 call xmpi_sum(gran_pot4,rset%mpi%comm_bandfft ,ierr) 770 end if 771 end if 772 773 if(rset%debug)then 774 write(msg,'(2(2a,4(2a,es11.4,a)))')& 775 & ' --------------------------' ,ch10, & 776 & ' entropy, horiz path=',' ',entropy1,ch10, & 777 & ' entropy, xmax path=',' ',entropy2,ch10, & 778 & ' entropy, xmin path=',' ',entropy3,ch10, & 779 & ' entropy, zero path=',' ',entropy4,ch10, & 780 & ' --------------------------' ,ch10, & 781 & ' -omega/T, horiz path=',' ',gran_pot1,ch10, & 782 & ' -omega/T, xmax path=',' ',gran_pot2,ch10, & 783 & ' -omega/T, xmin path=',' ',gran_pot3,ch10, & 784 & ' -omega/T, zero path=',' ',gran_pot4,ch10 785 call wrtout(std_out,msg,'COLL') 786 end if 787 788 e_eigenvalues=tsmear*(entropy-gran_pot) + rset%efermi*nelect 789 ! --In reality gran_pot is not the gran potential but the 790 ! potential omega=-PV (Landau-potential or grand-potential) 791 ! divided by -T so the internal energy 792 ! U:=e_eigenvalues= TS+omega+muN = ST-T*sum(ln(1-n))+muN = 793 ! T(S-gran_pot)+muN 794 795 796 if(rset%nl%nlpsp) then 797 call nlenergyrec(rset,enl,exppot,dtset%ngfft,dtset%natom,& 798 & dtset%typat,tsmear,trotter,tolrec) 799 end if 800 end if noentropie 801 !##### END ENTROPY AND GRAN POTENTIAL COMPUTATION ################## 802 !################################################################# 803 804 !if(associated(projec)) 805 if(rset%nl%nlpsp) then 806 ABI_DEALLOCATE(projec) 807 end if 808 if(associated(gcart_loc)) then 809 ABI_DEALLOCATE(gcart_loc) 810 end if 811 if((dtset%recgratio/=1 .or. rset%load==1)) then 812 ABI_DEALLOCATE(bufdispl) 813 ABI_DEALLOCATE(bufsize) 814 end if 815 !------------------------------------------------------------------ 816 !--Check if the convergence is reached for rho 817 drho = maxval(abs(rhor(min_pt:max_pt,1)-rholocal(:))) 818 drhomax = drho 819 call xmpi_max(drho,drhomax,rset%mpi%comm_bandfft,ierr) 820 821 !write(std_out,*)'drhomax,toldrho',drhomax,toldrho 822 if(drhomax<toldrho)then 823 rset%quitrec = rset%quitrec+1 824 else 825 rset%quitrec = 0 826 end if 827 828 !------------------------------------------------------------------- 829 !--Density on all procs 830 rhor(min_pt:max_pt,1) = rholocal(:) 831 if(rset%mpi%nproc /= 1)then 832 call xmpi_allgatherv(rholocal,rset%par%ntranche,rhor(:,1),& 833 & rset%par%vcount,rset%par%displs,& 834 & rset%mpi%comm_band,ierr) 835 end if 836 837 !-------------------------------------------------------------------- 838 !--2nd EKIN CALCULATION: this method is used 839 noekin2 : if(get_K_S_G==1)then 840 intrhov = (inf_ucvol)*sum(rholocal*vtrial(min_pt:max_pt,1)) 841 call xmpi_sum(intrhov,rset%mpi%comm_bandfft ,ierr) 842 843 ek = e_eigenvalues-intrhov-enl 844 845 846 if(rset%debug) then 847 write (msg,'(2a,3f15.10,2a,3f15.10,2a,f15.10,a)') ch10,& 848 & ' ek,int(rho*V),ek+int(rho*V) ', ek, intrhov, ek+ intrhov,ch10, & 849 & ' kT*S, kT*sum(ln(...)), diff ', tsmear*entropy, tsmear*gran_pot, tsmear*(entropy-gran_pot),ch10, & 850 & ' kT(S-sum(ln(...)))+mu*nelect', tsmear*(entropy-gran_pot)+rset%efermi*nelect,ch10 851 call wrtout(std_out,msg,'COLL') 852 end if 853 854 855 end if noekin2 856 !-------------------------------------------------------------------- 857 fermie = rset%efermi 858 859 !-------------------------------------------------------- 860 !!--At the first step to find the max number of recursion 861 !! needed to convergence, then redefine nrec. 862 if(initialized==0 .and. dtset%ntime>0) then 863 call Calcnrec(rset,b2local) 864 end if 865 866 !-------------------------------------------------------- 867 call destroy_distribfft(rset%mpi%distribfft) 868 call init_distribfft(rset%mpi%distribfft,'c',rset%mpi%nproc_fft,rset%ngfftrec(2),rset%ngfftrec(3)) 869 call init_distribfft(rset%mpi%distribfft,'f',rset%mpi%nproc_fft,dtset%ngfft(2),dtset%ngfft(3)) 870 871 !--Printing results 872 write(msg,'(3a,f15.10)')& 873 & ' -- Results: --------------------------------------',ch10,& 874 & ' mu =',rset%efermi 875 call wrtout(std_out,msg,'COLL') 876 if(get_K_S_G==1)then 877 write(msg,'(a,f15.10,6(2a,f15.10))')& 878 & ' potmin =',potmin,ch10,& 879 & ' <V_eff> =',intrhov,ch10,& 880 & ' entropy =',entropy,ch10,& 881 & ' -omega/T =',gran_pot,ch10,& 882 & ' eigenvalues =',e_eigenvalues,ch10,& 883 & ' kinetic =',ek,ch10,& 884 & ' non-loc ene =',enl 885 call wrtout(std_out,msg,'COLL') 886 end if 887 write(msg,'(a,50a)')' ',('-',ii=1,50) 888 call wrtout(std_out,msg,'COLL') 889 !write(std_out,*)'is the pressure ',gran_pot*tsmear/(rset%inf%ucvol*real(nfftrec,dp)) 890 891 !--Structured debugging : if rset%debug=T, stop here. 892 if(.false.)then !(rset%debug) 893 call wrtout(std_out,' rhor ','PERS') 894 write(std_out,*)rhor(:,1) 895 call wrtout(std_out,' ','COLL') 896 write(msg,'(a,2d10.3)')' temps recursion ',tsec 897 call wrtout(std_out,msg,'COLL') 898 write(msg,'(a,l1,a)') ' vtorhorec : rset%debug=-',rset%debug,', debugging mode => stop ' 899 MSG_ERROR(msg) 900 end if 901 902 call timab(600,2,tsec2) 903 call timab(21,2,tsec) 904 905 call symrhg(1,gprimd,irrzon,rset%mpi,nfftf,& 906 & dtset%ngfft(1)*dtset%ngfft(2)*dtset%ngfft(3),dtset%ngfft,dtset%nspden,& 907 & dtset%nsppol,dtset%nsym,dtset%paral_kgb,& 908 & phnons,rhog,rhor,rprimd,dtset%symafm,dtset%symrel) 909 910 end subroutine vtorhorec