TABLE OF CONTENTS


ABINIT/vtorhorec [ Functions ]

[ Top ] [ Functions ]

NAME

 vtorhorec

FUNCTION

 This routine computes the new density from a fixed potential (vtrial)
 using a recursion method

COPYRIGHT

 Copyright (C) 2008-2018 ABINIT group (SLeroux,MMancini).
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt .

INPUTS

  deltastep= if 0 the iteration step is equal to dtset%nstep
  initialized= if 0 the initialization of the gstate run is not yet finished
  operator (ground-state symmetries)
  dtset <type(dataset_type)>=all input variables for this dataset
  irrzon(nfft**(1-1/nsym),2,(nspden/nsppol)-3*(nspden/4))=irreducible zone data
  nfftf=(effective) number of FFT grid points (for this processor)
  nspden=number of spin-density components
  nsppol=1 for unpolarized, 2 for spin-polarized
  nsym=number of symmetry elements in space group
  phnons(2,nfft**(1-1/nsym),(nspden/nsppol)-3*(nspden/4))=nonsymmorphic translation phases
  vtrial(nfft,nspden)=INPUT Vtrial(r).
  rset <type(recursion_type)> all variables for recursion
  rprimd(3,3)=dimensional primitive translations in real space (bohr)
  gprimd(3,3)=dimensional primitive translations in reciprocal space

OUTPUT

  ek=kinetic energy part of total energy.
  enl=nonlocal pseudopotential part of total energy.
  entropy=entropy due to the occupation number smearing (if metal)
  e_eigenvalues=Sum of the eigenvalues - Band energy (Hartree)
  fermie=fermi energy (Hartree)
  grnl(3*natom)=stores grads of nonlocal energy wrt length scales
   (3x3 tensor) and grads wrt atomic coordinates (3*natom)

SIDE EFFECTS

  rhog(2,nfft)=array for Fourier transform of electron density
  rhor(nfft,nspden)=array for electron density in electrons/bohr**3.
  rset%efermi= fermi energy

PARENTS

      scfcv

CHILDREN

      calcnrec,cudarec,destroy_distribfft,entropyrec,fermisolverec
      gran_potrec,init_distribfft,nlenergyrec,prtwork,recursion,reshape_pot
      symrhg,timab,transgrid,wrtout,xmpi_allgatherv,xmpi_barrier,xmpi_max
      xmpi_sum,xredistribute

NOTES

  at this time :
       - grnl in not implemented
       - symetrie usage not implemented (irrzon not used and nsym should be 1)
       - spin-polarized not implemented (nsppol must be 1, nspden ?)
       - phnons used only in symrhg
       - need a rectangular box (ngfft(1)=ngfft(2)=ngfft(3))

SOURCE

 66 #if defined HAVE_CONFIG_H
 67 #include "config.h"
 68 #endif
 69 
 70 #include "abi_common.h"
 71 
 72 subroutine vtorhorec(dtset,&
 73 &  ek,enl,entropy,e_eigenvalues,fermie,&
 74 &  grnl,initialized,irrzon,nfftf,phnons,&
 75 &  rhog, rhor, vtrial,rset,deltastep,rprimd,gprimd)
 76 
 77  use defs_basis
 78  use defs_abitypes
 79  use defs_rectypes
 80  use m_xmpi
 81  use m_pretty_rec
 82  use m_errors
 83  use m_profiling_abi
 84 
 85  use m_rec,            only : Calcnrec
 86  use m_rec_tools,      only : reshape_pot
 87 #ifdef HAVE_GPU_CUDA
 88  use m_initcuda,       only : cudap
 89  use m_hidecudarec
 90  use m_xredistribute
 91 #endif
 92 
 93 !This section has been created automatically by the script Abilint (TD).
 94 !Do not modify the following lines by hand.
 95 #undef ABI_FUNC
 96 #define ABI_FUNC 'vtorhorec'
 97  use interfaces_14_hidewrite
 98  use interfaces_18_timing
 99  use interfaces_65_paw
100  use interfaces_67_common
101  use interfaces_68_recursion, except_this_one => vtorhorec
102 !End of the abilint section
103 
104  implicit none
105 
106 !Arguments -------------------------------
107 !scalars
108  integer,intent(in) :: initialized
109  integer,intent(in) :: nfftf,deltastep
110  real(dp),intent(out) :: e_eigenvalues,ek,enl,entropy,fermie
111  type(dataset_type),intent(in) :: dtset
112  type(recursion_type),intent(inout) :: rset
113 !arrays
114  integer, intent(in) :: irrzon(:,:,:)
115  real(dp),intent(in) :: rprimd(3,3),gprimd(3,3)
116  real(dp),intent(in) :: phnons(:,:,:)
117  real(dp),intent(in) :: vtrial(:,:)
118  real(dp),intent(inout) :: rhog(:,:)
119  real(dp),intent(out) :: grnl(:)  !vz_i
120  real(dp),intent(inout) :: rhor(:,:) !vz_i
121 
122 !Local variables-------------------------------
123 !scalars
124  integer :: nfftrec, dim_entro,ii1,jj1,kk1
125  integer :: ierr,ii,ilmn,ipsp,dim_trott
126  integer :: ipoint,ipointlocal,jj,kk,irec
127  integer :: n1,n2,n3,n4,n_pt_integ_entropy
128  integer :: nrec,iatom,min_pt,max_pt,swt_tm
129  integer ::  get_K_S_G
130  integer :: tim_fourdp,trotter
131  real(dp),parameter :: perc_vmin=one
132  real(dp) :: beta,drho,drhomax
133  real(dp) :: entropy1,entropy2,entropy3,entropy4
134  real(dp) :: entropylocal,entropylocal1,entropylocal2
135  real(dp) :: entropylocal3,entropylocal4,gran_pot,gran_pot1,gran_pot2
136  real(dp) :: gran_pot3,gran_pot4,gran_pot_local,gran_pot_local1
137  real(dp) :: gran_pot_local2,gran_pot_local3,gran_pot_local4
138  real(dp) :: inf_ucvol,intrhov,factor
139  real(dp) :: nelect,potmin,rtrotter,toldrho,tolrec,tsmear
140  real(dp) :: xmax,nlpotmin,ratio1,ratio2,ratio4,ratio8
141  type(recparall_type) :: recpar
142  character(len=500) :: msg
143  !arrays
144  integer :: ngfftrec(18),trasl(3)
145  integer,pointer :: gcart_loc(:,:)
146  integer,allocatable :: bufsize(:), bufdispl(:)
147  real(dp) :: tsec(2),tsec2(2)
148  real(dp) :: exppot(0:dtset%ngfft(1)*dtset%ngfft(2)*dtset%ngfft(3)-1)
149  real(dp),target :: rholocal(1:rset%par%ntranche)
150  real(dp),target :: alocal(0:rset%min_nrec,1:rset%par%ntranche)
151  real(dp),target :: b2local(0:rset%min_nrec,1:rset%par%ntranche)
152  real(dp),pointer :: rho_wrk(:)
153  real(dp),pointer :: a_wrk(:,:),b2_wrk(:,:)
154  real(dp),allocatable :: rholocal_f(:), rholoc_2(:)
155  real(dp),allocatable :: rhogf(:,:),rhogc(:,:),aloc_copy(:,:),b2loc_copy(:,:)
156  real(dp),allocatable :: gran_pot_v_f(:,:),gran_pot_v_c(:,:),gran_pot_v_2(:,:)
157  real(dp),allocatable :: entropy_v_f(:,:),entropy_v_c(:,:),entropy_v_2(:,:)
158  real(dp),allocatable :: ablocal_1(:,:,:),ablocal_2(:,:,:),ablocal_f(:,:,:)
159  real(dp),allocatable :: exppotloc(:)
160  real(dp),allocatable :: projec(:,:,:,:,:)
161 #if defined HAVE_GPU_CUDA
162  integer :: max_rec
163  integer,allocatable :: vcount_0(:), displs_0(:)
164  integer,allocatable :: vcount_1(:), displs_1(:)
165  real(dp),allocatable,target :: rho_hyb(:)
166  real(dp),allocatable,target :: a_hyb(:,:),b2_hyb(:,:)
167  real(cudap),allocatable :: an_dev(:,:)
168  real(cudap),allocatable :: bn2_dev(:,:)
169 #endif
170 
171 ! *********************************************************************
172  if(rset%debug)then
173    write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' vtorhorec : enter '
174    call wrtout(std_out,msg,'PERS')
175  end if
176 
177  call timab(21,1,tsec)
178  call timab(600,1,tsec2)
179 !##################################################################################################
180 !!--Initalization in the FIRST time in VTORHOREC is made in SCFCV by FIRST_REC routine
181 !!--Parameters for the recursion method AND  Initialisation
182 
183  trotter = dtset%recptrott  !--trotter parameter
184  nelect  = dtset%nelect     !--number of electrons
185 
186  toldrho = dtset%rectolden  !--tollerance for density
187  tolrec  = toldrho*1.d-2    !--tollerance for local density
188 
189  tsmear  = dtset%tsmear     !--temperature
190  beta    = one/tsmear       !--inverse of temperature
191 
192  factor = real(dtset%recgratio**3*100*rset%mpi%nproc,dp)/real(nfftf,dp)
193 
194 !--Assignation of the rset variable:
195  nrec       = rset%min_nrec
196  nfftrec    = rset%nfftrec
197  ngfftrec   = rset%ngfftrec
198  inf_ucvol  = rset%inf%ucvol
199 
200  min_pt = rset%par%displs(rset%mpi%me)+1
201  max_pt = min_pt+rset%par%vcount(rset%mpi%me)-1
202 
203 !--In the last self-constistent loop or if density is converged the
204 !thermodynamics quantities are calculated
205  get_K_S_G = 0; if(deltastep==0 .or. rset%quitrec/=0) get_K_S_G = 1;
206 
207 !--Rewriting the trotter parameter
208  rtrotter  = max(half,real(trotter,dp))
209  dim_trott = max(0,2*trotter-1)
210 
211 !--Variables Optimisation
212  ratio1 = beta/rtrotter
213  ratio2 = ratio1/two
214  ratio4 = ratio1/four
215  ratio8 = ratio1/eight
216 
217 !--Integration points for entropy
218  n_pt_integ_entropy = max(25,dtset%recnpath)
219 
220 !-- energies non-local: at day not implemented
221  enl = zero
222  grnl = zero
223 !jmb
224  ek = zero
225 
226 !--only a copy of ngfft(1:3) and nfft  (to purge!!)
227  n1 = dtset%ngfft(1) ; n2 = dtset%ngfft(2) ; n3 = dtset%ngfft(3)
228  n4 = n3
229 
230 !--time switch to measure gpu-cpu syncrhonisation
231  swt_tm = 0 ; !no gpu initally
232 
233  exppot = zero
234  nullify(gcart_loc)
235 
236  if(dtset%rectesteg==1)then
237 !  --Free electron gas case
238    exppot = one
239  else
240    if(.not.(rset%nl%nlpsp)) then
241 !    --Local case
242 !    --COMPUTATION OF exp( -beta*pot/(4*rtrotter))
243      ABI_ALLOCATE(gcart_loc,(0,0))
244      gcart_loc = 0
245      exppot = exp( -(ratio4*vtrial(:,1)))
246    else
247 !    --Non-Local case
248 !    --COMPUTATION OF exp(-beta*pot/(8*rtrotter))
249      exppot = exp( -(ratio8*vtrial(:,1)))
250 
251      ABI_ALLOCATE(gcart_loc,(3,dtset%natom))
252      gcart_loc = rset%inf%gcart
253      ABI_ALLOCATE(projec,(0:ngfftrec(1)-1,0:ngfftrec(2)-1,0:ngfftrec(3)-1,rset%nl%lmnmax,dtset%natom))
254      projec = zero
255 
256      if(.not.(rset%tronc)) then
257        do iatom =1, dtset%natom
258          ipsp = dtset%typat(iatom)
259          do ilmn = 1,rset%nl%lmnmax
260            projec(:,:,:,ilmn,iatom) = reshape(rset%nl%projec(:,ilmn,ipsp),shape=shape(projec(:,:,:,1,1)))
261            do ii=1,3
262              projec(:,:,:,ilmn,iatom) = cshift(projec(:,:,:,ilmn,iatom),shift=ngfftrec(ii)/2-gcart_loc(ii,iatom),dim=ii)
263            end do
264          end do
265        end do
266      end if
267    end if
268  end if
269 
270 !###################################################################################
271 !MAIN LOOP
272 
273  rholocal = zero; alocal = zero; b2local = zero
274  ipointlocal = 1
275 
276 !--Allocation: if hybrid calculation is done then I have to use
277 !balanced work on devices.
278  nullify(rho_wrk,a_wrk,b2_wrk)
279 
280  if(rset%load == 1)then
281 #ifdef HAVE_GPU_CUDA
282    ABI_ALLOCATE(rho_hyb,(1:rset%GPU%par%npt))
283    ABI_ALLOCATE(a_hyb,(0:nrec,1:rset%GPU%par%npt))
284    ABI_ALLOCATE(b2_hyb,(0:nrec,1:rset%GPU%par%npt))
285    rho_hyb = zero; a_hyb = zero; b2_hyb = zero
286    rho_wrk => rho_hyb
287    a_wrk   => a_hyb
288    b2_wrk  => b2_hyb
289    recpar  = rset%GPU%par
290 #endif
291  else
292    rho_wrk => rholocal
293    a_wrk   => alocal
294    b2_wrk  => b2local
295    recpar  = rset%par
296  end if
297 
298 !#if defined HAVE_GPU_CUDA
299 !if(rset%debug)then
300 !write (std_out,*) 'rset%recGPU%nptrec ',rset%recGPU%nptrec
301 !write (std_out,*) 'rset%gpudevice ',rset%gpudevice
302 !write (std_out,*) 'rset%ngfftrec ',rset%ngfftrec(1:3)
303 !write (std_out,*) 'rset%min_nrec ',rset%min_nrec
304 !write (std_out,*) 'rset%par%ntranche ',rset%par%ntranche
305 !write (std_out,*) 'rset%par%min_pt ',rset%par%min_pt,min_pt
306 !write (std_out,*) 'rset%par%max_pt ',rset%par%max_pt,max_pt
307 !write (std_out,*) 'pt0 ',rset%par%pt0%x,rset%par%pt0%y,rset%par%pt0%z
308 !write (std_out,*) 'pt1 ',rset%par%pt1%x,rset%par%pt1%y,rset%par%pt1%z
309 !end if
310 !#endif
311 
312  if(rset%gpudevice>=0) then
313 #if defined HAVE_GPU_CUDA
314    swt_tm = 1;
315    call timab(607,1,tsec2)
316    ABI_ALLOCATE(an_dev,(0:recpar%npt-1,0:nrec))
317    ABI_ALLOCATE(bn2_dev,(0:recpar%npt-1,0:nrec))
318    an_dev = zero
319    bn2_dev = zero; bn2_dev(:,0) = one
320 
321    call cudarec( rset, exppot,an_dev,bn2_dev,&
322 &   beta,trotter,tolrec,dtset%recgratio,dtset%ngfft(:3),max_rec)
323    
324    max_rec = min(max_rec,nrec)
325    a_wrk(0:max_rec,1:recpar%npt)  = transpose(an_dev(0:,0:max_rec))
326    b2_wrk(0:max_rec,1:recpar%npt) = transpose(bn2_dev(0:,0:max_rec))
327    
328    ABI_DEALLOCATE(an_dev)
329    ABI_DEALLOCATE(bn2_dev)
330    call timab(607,2,tsec2)
331 
332    ipointlocal = recpar%npt+1
333 
334 !  !DEBUG CUDA
335 !  if(rset%debug)then
336 !  if( rset%mpi%me==0)then
337 !  do ipoint = 1,rset%par%npt,1
338 !  kk=ipoint/(product(dtset%ngfft(:2)))
339 !  jj=ipoint/dtset%ngfft(1)-kk*dtset%ngfft(2)
340 !  ii=ipoint-jj*dtset%ngfft(1)-kk*dtset%ngfft(2)*dtset%ngfft(1)
341 !  write(msg,'(a,4i8,2(a,a9,5f12.6))')&
342 !  & 'pt',ipoint,ii,jj,kk,&
343 !  & ch10,'an-gpu   ',real(alocal(:4,ipoint)),&
344 !  & ch10,'b2n-gpu  ',real(b2local(:4,ipoint))
345 !  call wrtout(std_out,msg,'COLL')
346 !  end do
347 !  endif
348 !  endif
349 !  !ENDDEBUG CUDA
350 #endif
351 
352  else
353    if (.not.(rset%tronc)) then
354      graou1 : do kk = recpar%pt0%z,recpar%pt1%z,dtset%recgratio
355        do jj = 0,n2-1,dtset%recgratio
356          do ii = 0,n1-1,dtset%recgratio
357            ipoint = ii+(jj+kk*n2)*n1
358 !          --Local position of atoms
359            if (ipoint<recpar%min_pt) cycle
360 !          --Computation done by that proc
361            tim_fourdp=6
362            call recursion(exppot,ii,jj,kk, &
363 &           a_wrk(:,ipointlocal), &
364 &           b2_wrk(:,ipointlocal), &
365 &           rho_wrk(ipointlocal),&
366 &           nrec, rset%efermi,tsmear,rtrotter,dim_trott, &
367 &           rset%ZT_p, &
368 &           tolrec,dtset%typat,rset%nl,&
369 &           rset%mpi,nfftrec,ngfftrec,rset%inf,&
370 &           tim_fourdp,dtset%natom,projec,1)
371            ipointlocal = ipointlocal + 1
372 !          write(std_out,*)'ipointlocal',ipoint,ipointlocal,ii,jj,kk
373            call prtwork(dtset%recgratio**3*ipointlocal*100*rset%mpi%nproc/nfftrec)
374            if(ipoint==recpar%max_pt) exit graou1
375          end do
376        end do
377      end do graou1
378    else !--We use a troncation
379      ABI_ALLOCATE(exppotloc,(0:nfftrec-1))
380      graou2 : do kk = recpar%pt0%z,recpar%pt1%z,dtset%recgratio
381        do jj = 0,n2-1,dtset%recgratio
382          do ii = 0,n1-1,dtset%recgratio
383            ipoint = ii+(jj+kk*n2)*n1
384            if (ipoint<recpar%min_pt) cycle
385 !          computation done by that proc
386            exppotloc = zero
387 !          --Traslation to move position on the ngfftrec grid center
388            trasl = -(/ii,jj,kk/)+ngfftrec(:3)/2
389            if(rset%nl%nlpsp) then
390              do iatom=1,dtset%natom
391 !              --local position of atoms
392                gcart_loc(:,iatom) = rset%inf%gcart(:,iatom)+trasl
393                gcart_loc(:,iatom) = modulo(gcart_loc(:,iatom),(/n1,n2,n3/))
394 !              --Traslation of non-local projectors
395                do ilmn = 1,rset%nl%lmnmax
396                  projec(:,:,:,ilmn,iatom) = reshape(rset%nl%projec(:,ilmn,dtset%typat(iatom)),shape=shape(projec(:,:,:,1,1)))
397                  do ii1=1,3
398                    projec(:,:,:,ilmn,iatom) = eoshift(projec(:,:,:,ilmn,iatom),shift=ngfftrec(ii1)/2-gcart_loc(ii1,iatom),dim=ii1)
399                  end do
400                end do
401              end do
402            end if
403 
404            call reshape_pot(trasl,nfftf,nfftrec,&
405 &           dtset%ngfft(:3),ngfftrec(:3),&
406 &           exppot,exppotloc)
407 
408            tim_fourdp=6
409            call recursion(exppotloc,ngfftrec(1)/2,ngfftrec(2)/2,ngfftrec(3)/2, &
410 &           a_wrk(:,ipointlocal), &
411 &           b2_wrk(:,ipointlocal), &
412 &           rho_wrk(ipointlocal),&
413 &           nrec, rset%efermi,tsmear,rtrotter,dim_trott, &
414 &           rset%ZT_p, &
415 &           tolrec,dtset%typat,rset%nl,&
416 &           rset%mpi,nfftrec,ngfftrec,rset%inf,&
417 &           tim_fourdp,dtset%natom,projec,1)
418            ipointlocal = ipointlocal + 1
419            call prtwork(factor*real(ipointlocal,dp))
420            if(ipoint==recpar%max_pt) exit graou2
421          end do
422        end do
423      end do graou2
424      ABI_DEALLOCATE(exppotloc)
425    end if
426    
427  end if
428  write(msg,'( a12,i12)')'ipointlocal',ipointlocal
429  call wrtout(std_out,msg,'PERS')
430  call timab(613+swt_tm,1,tsec2)  !!--start time-counter: sync gpu-cpu
431  call xmpi_barrier(rset%mpi%comm_bandfft)
432  call timab(613+swt_tm,2,tsec2)  !!--stop time-counter: sync gpu-cpu
433 
434 !!#############################################################
435 !!--ASSIGNATION PARAMETERS TO MENAGE PARALLELISME OF TRANSGRID
436  if((rset%load==1 .or. dtset%recgratio>1))then
437 !  --Bufsize contains the values of the number of points calculated
438 !  by any proc on the coarse grid; bufsize_f on the fine grid
439    call timab(604,1,tsec2) !--start time-counter: transgrid
440    ABI_ALLOCATE(bufsize,(0:rset%mpi%nproc-1))
441    ABI_ALLOCATE(bufdispl,(0:rset%mpi%nproc-1))
442    bufsize = 0;
443    bufsize(rset%mpi%me) = rset%par%npt
444    call xmpi_sum(bufsize,rset%mpi%comm_bandfft,ierr)
445 
446    bufdispl(0) = 0;
447    if(rset%mpi%nproc>1) bufdispl(1:) = (/(sum(bufsize(:ii)),ii=0,rset%mpi%nproc-1)/)
448    call timab(604,2,tsec2) !--stop time-counter: transgrid
449  end if
450 !!####################################################################
451 !!--REDISTRIBUTION OF LOAD ON PROCS AFTER RECURSION IF CUDA IS USED
452 #ifdef HAVE_GPU_CUDA
453  if(rset%load==1)then
454    call timab(604,1,tsec2) !--start time-counter: transgrid
455    call xredistribute(rho_hyb,rset%GPU%par%vcount,rset%GPU%par%displs,&
456 &   rholocal,bufsize,bufdispl,rset%mpi%me,&
457 &   rset%mpi%nproc,&
458 &   rset%mpi%comm_bandfft,ierr)
459 
460 
461    ABI_ALLOCATE(vcount_0,(0:rset%mpi%nproc-1))
462    ABI_ALLOCATE(displs_0,(0:rset%mpi%nproc-1))
463    ABI_ALLOCATE(vcount_1,(0:rset%mpi%nproc-1))
464    ABI_ALLOCATE(displs_1,(0:rset%mpi%nproc-1))
465 
466    vcount_0 = 0
467    vcount_0(rset%mpi%me) = rset%par%npt*(nrec+1)
468    call xmpi_sum(vcount_0,rset%mpi%comm_bandfft,ierr)
469    displs_0 = 0
470    if(rset%mpi%nproc>1) displs_0(1:) = (/(sum(vcount_0(:ii)),ii=0,rset%mpi%nproc-1)/)
471 
472 
473    vcount_1 = 0
474    vcount_1(rset%mpi%me) = rset%GPU%par%npt*(nrec+1)
475    call xmpi_sum(vcount_1,rset%mpi%comm_bandfft,ierr)
476    displs_1 = 0
477    if(rset%mpi%nproc>1) displs_1(1:) = (/(sum(vcount_1(:ii)),ii=0,rset%mpi%nproc-1)/)
478 
479 
480    call xredistribute(a_hyb,vcount_1,displs_1,&
481 &   alocal,vcount_0,displs_0,&
482 &   rset%mpi%me,rset%mpi%nproc,& 
483 &   rset%mpi%comm_bandfft,ierr)
484 
485    call xredistribute(b2_hyb,vcount_1,displs_1,&
486 &   b2local,vcount_0,displs_0,&
487 &   rset%mpi%me,rset%mpi%nproc,& 
488 &   rset%mpi%comm_bandfft,ierr)
489 
490    nullify(rho_wrk,a_wrk,b2_wrk)
491    ABI_DEALLOCATE(rho_hyb)
492    ABI_DEALLOCATE(a_hyb)
493    ABI_DEALLOCATE(b2_hyb)
494    ABI_DEALLOCATE(vcount_0)
495    ABI_DEALLOCATE(displs_0)
496    ABI_DEALLOCATE(vcount_1)
497    ABI_DEALLOCATE(displs_1)
498    call timab(604,2,tsec2) !--start time-counter: transgrid
499  end if
500 #endif
501 
502 !#############################################################
503 !--TRANSGRID FOR THE DENSITY RHO AND THE COEFFICIENTS AN AND B2N
504  if (dtset%recgratio>1) then
505 !  --variables allocation and initialisation-------
506    write (msg,'(a)')' - TRANSGRID USING -----'
507    call wrtout(std_out,msg,'COLL')
508    call timab(604,1,tsec2) !--start time-counter: transgrid
509 
510    ABI_ALLOCATE(rholocal_f,(rset%pawfgr%nfft))
511    ABI_ALLOCATE(rhogf,(2,rset%pawfgr%nfft))
512    ABI_ALLOCATE(rhogc,(2,rset%pawfgr%nfftc))
513    ABI_ALLOCATE(rholoc_2,(1:rset%pawfgr%nfftc))
514    ABI_ALLOCATE(ablocal_2,(1:rset%pawfgr%nfftc,0:nrec,2))
515    ABI_ALLOCATE(ablocal_f,(1:rset%pawfgr%nfft,0:nrec,2))
516    ABI_ALLOCATE(ablocal_1,(1:rset%par%npt,0:nrec,2))
517 
518    call destroy_distribfft(rset%mpi%distribfft)
519    call init_distribfft(rset%mpi%distribfft,'c',rset%mpi%nproc_fft,rset%pawfgr%ngfftc(2) ,rset%pawfgr%ngfftc(3))
520    call init_distribfft(rset%mpi%distribfft,'f',rset%mpi%nproc_fft,rset%pawfgr%ngfft(2) ,rset%pawfgr%ngfft(3))
521 
522    rholocal_f = zero; ablocal_f = zero; ablocal_2 = zero
523 
524    ablocal_1(:,:,1) = transpose(alocal(:,1:rset%par%npt))
525    ablocal_1(:,:,2) = transpose(b2local(:,1:rset%par%npt))
526 
527    if(get_K_S_G==1 .and. dtset%recgratio>1 ) then
528      ABI_ALLOCATE(aloc_copy,(0:nrec,1:rset%par%npt))
529      ABI_ALLOCATE(b2loc_copy,(0:nrec,1:rset%par%npt))
530      aloc_copy = alocal(:,1:rset%par%npt)
531      b2loc_copy = b2local(:,1:rset%par%npt)
532    end if
533 
534    if(rset%mpi%nproc ==1) then
535 !    --SEQUENTIAL CASE--
536      rholoc_2 = rholocal(1:rset%par%npt)
537      ablocal_2 = ablocal_1
538 !    --Transigrid: coarse->fine
539      rhogf = zero; rhogc = zero
540      call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,rhogf,rholoc_2,rholocal_f)
541      do jj1 = 1,2
542        do ipoint = 0,nrec
543          rhogf = zero; rhogc = zero
544          call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,rhogf,&
545 &         ablocal_2(:,ipoint,jj1),ablocal_f(:,ipoint,jj1))
546        end do
547      end do
548 !    --Assignation of the interpolated results on the fine grid--
549      rholocal = rholocal_f
550      alocal = transpose(ablocal_f(:,:,1))
551      b2local = transpose(ablocal_f(:,:,2))
552 
553    else
554 !    --PARALLEL CASE--
555      rholoc_2 = zero
556 !    --Send on all procs rho,an,bn--
557      call xmpi_allgatherv(rholocal(1:rset%par%npt),bufsize(rset%mpi%me),rholoc_2,&
558 &     bufsize,bufdispl,rset%mpi%comm_bandfft,ierr)
559      do irec = 0,nrec
560        call xmpi_allgatherv(ablocal_1(1:rset%par%npt,irec,1),bufsize(rset%mpi%me),&
561 &       ablocal_2(:,irec,1),bufsize,bufdispl,rset%mpi%comm_bandfft,ierr)
562        call xmpi_allgatherv(ablocal_1(1:rset%par%npt,irec,2),bufsize(rset%mpi%me),&
563 &       ablocal_2(:,irec,2),bufsize,bufdispl,rset%mpi%comm_bandfft,ierr)
564      end do
565 
566 
567 !    --Transigrid: coarse->fine on differents procs (with respect
568 !    the number of recursion)
569      rhogf = zero;  rhogc = zero
570      call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,rhogf,rholoc_2,rholocal_f)
571 
572      do irec = 0,2*(nrec+1)-1
573        ii1 = modulo(irec,rset%mpi%nproc)
574        jj1 = 1+modulo(irec,2)
575        kk1 = floor(irec/2.)
576        if(maxval(abs(ablocal_2(:,kk1,jj1))) > tol10 .and. rset%mpi%me == ii1) then
577          rhogf = zero; rhogc = zero
578          call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,&
579 &         rhogf,ablocal_2(:,kk1,jj1),ablocal_f(:,kk1,jj1))
580        end if
581      end do
582 
583 !    --Recuperation of all interpolated results
584 !    from procs to allprocs
585      call xmpi_sum(ablocal_f,rset%mpi%comm_bandfft,ierr)
586 
587 !    --Assignation the interpolated results on the fine grid
588 !    any procs to obtain the same point as in the standard recursion
589      do ii1 = 0, rset%mpi%nproc-1
590        jj1 = rset%par%displs(ii1)+1
591        if(ii1 == rset%mpi%me) then
592          alocal = transpose(ablocal_f(jj1:jj1+rset%par%vcount(ii1)-1,:,1))
593          b2local = transpose(ablocal_f(jj1:jj1+rset%par%vcount(ii1)-1,:,2))
594          rholocal = rholocal_f(jj1:jj1+rset%par%vcount(ii1)-1)
595        end if
596      end do
597    end if
598    ABI_DEALLOCATE(ablocal_f)
599    ABI_DEALLOCATE(ablocal_2)
600    ABI_DEALLOCATE(ablocal_1)
601    ABI_DEALLOCATE(rhogf)
602    ABI_DEALLOCATE(rholocal_f)
603    ABI_DEALLOCATE(rhogc)
604    ABI_DEALLOCATE(rholoc_2)
605 
606    call timab(604,2,tsec2) !--stop time-counter: transgrid
607  else
608    write(msg,'(a)')' - TRANSGRID NOT USED --'
609    call wrtout(std_out,msg,'COLL')
610  end if
611 !!--End transgrid
612 !!###############################################################
613 !###################################
614 !--Fermi energy computation
615 !--find the good mu by imposing the electrons number
616  call fermisolverec(rset%efermi,rholocal,alocal,b2local,rset%debug,&
617 & rset%min_nrec,tsmear,trotter,nelect,tol10,100, &
618 & rset%par%ntranche,rset%mpi,inf_ucvol,& !.False. .and.&
619 & (rset%tp==2 .or. rset%tp==3) .and. trotter>1)
620 
621 !#################################################################
622 !######### ENTROPY AND GRAN POTENTIAL COMPUTATION  ##################
623  entropy = zero
624  gran_pot = zero
625  noentropie : if(get_K_S_G==1)then
626    entropy1 = zero; entropy2 = zero ;entropy3 = zero; entropy4 = zero
627    gran_pot1 = zero ; gran_pot2 = zero; gran_pot3 = zero; gran_pot4 = zero
628 
629 !  --Seek for the min of the path integral
630    potmin = zero;  nlpotmin = zero
631    if(dtset%rectesteg/=1) potmin = minval(vtrial(:,1))
632    if(rset%nl%nlpsp)  nlpotmin = minval(rset%nl%eival(:,:,:))
633    xmax = exp(-ratio2*(potmin+nlpotmin-rset%efermi))
634 
635    dim_entro = 0;  if(rset%debug) dim_entro = 4;
636 
637    if(dtset%recgratio>1) then
638 !    --Recgratio>1
639      ABI_ALLOCATE(rhogf,(2,rset%pawfgr%nfft))
640      ABI_ALLOCATE(rhogc,(2,rset%pawfgr%nfftc))
641      ABI_ALLOCATE(entropy_v_f,(rset%pawfgr%nfft,0:4))
642      ABI_ALLOCATE(entropy_v_c,(rset%pawfgr%nfftc,0:4))
643      ABI_ALLOCATE(entropy_v_2,(1:rset%par%npt,0:4))
644      ABI_ALLOCATE(gran_pot_v_f,(rset%pawfgr%nfft,0:4))
645      ABI_ALLOCATE(gran_pot_v_c,(rset%pawfgr%nfftc,0:4))
646      ABI_ALLOCATE(gran_pot_v_2,(1:rset%par%npt,0:4))
647 
648      entropy_v_c = zero; entropy_v_f = zero; entropy_v_2 = zero
649      gran_pot_v_c = zero; gran_pot_v_f = zero; gran_pot_v_2 = zero
650 
651 
652      do ipoint = 1,rset%par%npt
653        call entropyrec(exp(rset%efermi*ratio2)*aloc_copy(:,ipoint), &
654 &       exp(rset%efermi*ratio1)*b2loc_copy(:,ipoint), &
655 &       nrec,trotter,entropy_v_2(ipoint,0),two,&
656 &       rset%debug,n_pt_integ_entropy,perc_vmin*xmax,&
657 &       entropy_v_2(ipoint,1),&
658 &       entropy_v_2(ipoint,2),&
659 &       entropy_v_2(ipoint,3),&
660 &       entropy_v_2(ipoint,4))
661 
662        call gran_potrec(exp(rset%efermi*ratio2)*aloc_copy(:,ipoint), &
663 &       exp(rset%efermi*ratio1)*b2loc_copy(:,ipoint), &
664 &       nrec,trotter,gran_pot_v_2(ipoint,0),two,&
665 &       rset%debug,n_pt_integ_entropy,perc_vmin*xmax,&
666 &       gran_pot_v_2(ipoint,1),&
667 &       gran_pot_v_2(ipoint,2),&
668 &       gran_pot_v_2(ipoint,3),&
669 &       gran_pot_v_2(ipoint,4))           
670      end do
671      ABI_DEALLOCATE(aloc_copy)
672      ABI_DEALLOCATE(b2loc_copy)
673 
674      call timab(613+swt_tm,1,tsec2)  !!--start time-counter: sync gpu-cpu
675      call xmpi_barrier(rset%mpi%comm_bandfft)
676      call timab(613+swt_tm,2,tsec2)  !!--stop time-counter: sync gpu-cpu
677 
678      call timab(604,1,tsec2) !--start time-counter: transgrid
679      if(rset%mpi%nproc==1) then
680        entropy_v_c = entropy_v_2
681        gran_pot_v_c = gran_pot_v_2
682      end if
683      do ii1 = 0,dim_entro
684        call xmpi_allgatherv(entropy_v_2(:,ii1),bufsize(rset%mpi%me),&
685 &       entropy_v_c(:,ii1),bufsize,bufdispl,&
686 &       rset%mpi%comm_bandfft,ierr)   
687        call xmpi_allgatherv(gran_pot_v_2(:,ii1),bufsize(rset%mpi%me),&
688 &       gran_pot_v_c(:,ii1),bufsize,bufdispl,&
689 &       rset%mpi%comm_bandfft,ierr)   
690 
691        if(maxval(abs(entropy_v_c(:,ii1))) > tol10) then
692          rhogf = zero; rhogc = zero
693          call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,&
694 &         rset%pawfgr,rhogc,rhogf,entropy_v_c(:,ii1),entropy_v_f(:,ii1))
695        end if
696        if(maxval(abs(gran_pot_v_c(:,ii1))) >tol10) then
697          rhogf = zero; rhogc = zero
698          call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,&
699 &         rset%pawfgr,rhogc,rhogf,gran_pot_v_c(:,ii1),gran_pot_v_f(:,ii1))
700        end if
701      end do
702      call timab(604,2,tsec2) !--stop time-counter: transgrid
703 
704      entropy  = sum(entropy_v_f(:,0))
705      gran_pot  = sum(gran_pot_v_f(:,0))
706 
707      if(rset%debug)then
708        entropy1 = sum(entropy_v_f(:,1))
709        entropy2 = sum(entropy_v_f(:,2))
710        entropy3 = sum(entropy_v_f(:,3))
711        entropy4 = sum(entropy_v_f(:,4))
712        gran_pot1 = sum(gran_pot_v_f(:,1))
713        gran_pot2 = sum(gran_pot_v_f(:,2))
714        gran_pot3 = sum(gran_pot_v_f(:,3))
715        gran_pot4 = sum(gran_pot_v_f(:,4))
716      end if
717 
718      ABI_DEALLOCATE(entropy_v_f)
719      ABI_DEALLOCATE(entropy_v_c)
720      ABI_DEALLOCATE(entropy_v_2)
721      ABI_DEALLOCATE(rhogf)
722      ABI_DEALLOCATE(rhogc)
723      ABI_DEALLOCATE(gran_pot_v_f)
724      ABI_DEALLOCATE(gran_pot_v_c)
725      ABI_DEALLOCATE(gran_pot_v_2)
726 
727 
728    else
729 !    --Recgratio=1
730      do ipoint = 1,rset%par%ntranche
731        call entropyrec(exp(rset%efermi*ratio2)*alocal(:,ipoint), &
732 &       exp(rset%efermi*ratio1)*b2local(:,ipoint), &
733 &       nrec,trotter,entropylocal,two,&
734 &       rset%debug,n_pt_integ_entropy,perc_vmin*xmax,&
735 &       entropylocal1,entropylocal2,&
736 &       entropylocal3,entropylocal4)
737        call gran_potrec(exp(rset%efermi*ratio2)*alocal(:,ipoint), &
738 &       exp(rset%efermi*ratio1)*b2local(:,ipoint), &
739 &       nrec,trotter,gran_pot_local,two,& !/ucvol,&
740 &       rset%debug,n_pt_integ_entropy,perc_vmin*xmax,&
741 &       gran_pot_local1,gran_pot_local2,&
742 &       gran_pot_local3,gran_pot_local4)
743 
744        entropy = entropy + entropylocal
745        gran_pot = gran_pot + gran_pot_local
746        if(rset%debug)then
747          entropy1 = entropy1 + entropylocal1
748          entropy2 = entropy2 + entropylocal2
749          entropy3 = entropy3 + entropylocal3
750          entropy4 = entropy4 + entropylocal4
751          gran_pot1 = gran_pot1 + gran_pot_local1
752          gran_pot2 = gran_pot2 + gran_pot_local2
753          gran_pot3 = gran_pot3 + gran_pot_local3
754          gran_pot4 = gran_pot4 + gran_pot_local4
755        end if
756 
757      end do
758 
759      call xmpi_sum(entropy,rset%mpi%comm_bandfft ,ierr)
760      call xmpi_sum(gran_pot,rset%mpi%comm_bandfft ,ierr)
761      if(rset%debug)then
762        call xmpi_sum(entropy1,rset%mpi%comm_bandfft ,ierr)
763        call xmpi_sum(entropy2,rset%mpi%comm_bandfft ,ierr)
764        call xmpi_sum(entropy3,rset%mpi%comm_bandfft ,ierr)
765        call xmpi_sum(entropy4,rset%mpi%comm_bandfft ,ierr)
766        call xmpi_sum(gran_pot1,rset%mpi%comm_bandfft ,ierr)
767        call xmpi_sum(gran_pot2,rset%mpi%comm_bandfft ,ierr)
768        call xmpi_sum(gran_pot3,rset%mpi%comm_bandfft ,ierr)
769        call xmpi_sum(gran_pot4,rset%mpi%comm_bandfft ,ierr)
770      end if
771    end if
772 
773    if(rset%debug)then
774      write(msg,'(2(2a,4(2a,es11.4,a)))')&
775 &     ' --------------------------'        ,ch10, &
776 &     '  entropy, horiz path=',' ',entropy1,ch10, &
777 &     '  entropy, xmax  path=',' ',entropy2,ch10, &
778 &     '  entropy, xmin  path=',' ',entropy3,ch10, &
779 &     '  entropy, zero  path=',' ',entropy4,ch10, &
780 &     ' --------------------------'        ,ch10, &
781 &     ' -omega/T, horiz path=',' ',gran_pot1,ch10, &
782 &     ' -omega/T, xmax  path=',' ',gran_pot2,ch10, &
783 &     ' -omega/T, xmin  path=',' ',gran_pot3,ch10, &
784 &     ' -omega/T, zero  path=',' ',gran_pot4,ch10
785      call wrtout(std_out,msg,'COLL')
786    end if
787 
788    e_eigenvalues=tsmear*(entropy-gran_pot) + rset%efermi*nelect
789 !  --In reality gran_pot is not the gran potential but the
790 !  potential omega=-PV (Landau-potential or grand-potential)
791 !  divided by -T so the internal energy
792 !  U:=e_eigenvalues= TS+omega+muN = ST-T*sum(ln(1-n))+muN =
793 !  T(S-gran_pot)+muN
794 
795 
796    if(rset%nl%nlpsp) then
797      call nlenergyrec(rset,enl,exppot,dtset%ngfft,dtset%natom,&
798 &     dtset%typat,tsmear,trotter,tolrec)
799    end if
800  end if noentropie
801 !##### END ENTROPY AND GRAN POTENTIAL COMPUTATION  ##################
802 !#################################################################
803 
804 !if(associated(projec))
805  if(rset%nl%nlpsp)  then
806    ABI_DEALLOCATE(projec)
807  end if
808  if(associated(gcart_loc))  then
809    ABI_DEALLOCATE(gcart_loc)
810  end if
811  if((dtset%recgratio/=1 .or. rset%load==1))  then
812    ABI_DEALLOCATE(bufdispl)
813    ABI_DEALLOCATE(bufsize)
814  end if
815 !------------------------------------------------------------------
816 !--Check if the convergence is reached for rho
817  drho = maxval(abs(rhor(min_pt:max_pt,1)-rholocal(:)))
818  drhomax = drho
819  call xmpi_max(drho,drhomax,rset%mpi%comm_bandfft,ierr)
820 
821 !write(std_out,*)'drhomax,toldrho',drhomax,toldrho
822  if(drhomax<toldrho)then
823    rset%quitrec = rset%quitrec+1
824  else
825    rset%quitrec = 0
826  end if
827 
828 !-------------------------------------------------------------------
829 !--Density on all procs
830  rhor(min_pt:max_pt,1) = rholocal(:)
831  if(rset%mpi%nproc /= 1)then
832    call xmpi_allgatherv(rholocal,rset%par%ntranche,rhor(:,1),&
833 &   rset%par%vcount,rset%par%displs,&
834 &   rset%mpi%comm_band,ierr)
835  end if
836 
837 !--------------------------------------------------------------------
838 !--2nd EKIN CALCULATION: this method is used
839  noekin2 : if(get_K_S_G==1)then
840    intrhov = (inf_ucvol)*sum(rholocal*vtrial(min_pt:max_pt,1))
841    call xmpi_sum(intrhov,rset%mpi%comm_bandfft ,ierr)
842 
843    ek = e_eigenvalues-intrhov-enl
844 
845 
846    if(rset%debug) then
847      write (msg,'(2a,3f15.10,2a,3f15.10,2a,f15.10,a)') ch10,&
848 &     ' ek,int(rho*V),ek+int(rho*V) ', ek, intrhov,  ek+ intrhov,ch10, &
849 &     ' kT*S, kT*sum(ln(...)), diff ', tsmear*entropy, tsmear*gran_pot, tsmear*(entropy-gran_pot),ch10, &
850 &     ' kT(S-sum(ln(...)))+mu*nelect', tsmear*(entropy-gran_pot)+rset%efermi*nelect,ch10
851      call wrtout(std_out,msg,'COLL')
852    end if
853 
854 
855  end if noekin2
856 !--------------------------------------------------------------------
857  fermie = rset%efermi
858 
859 !--------------------------------------------------------
860 !!--At the first step to find the max number of recursion
861 !!  needed to convergence, then redefine nrec.
862  if(initialized==0 .and. dtset%ntime>0) then
863    call  Calcnrec(rset,b2local)
864  end if
865 
866 !--------------------------------------------------------
867  call destroy_distribfft(rset%mpi%distribfft)
868  call init_distribfft(rset%mpi%distribfft,'c',rset%mpi%nproc_fft,rset%ngfftrec(2),rset%ngfftrec(3))
869  call init_distribfft(rset%mpi%distribfft,'f',rset%mpi%nproc_fft,dtset%ngfft(2),dtset%ngfft(3))
870 
871 !--Printing results
872  write(msg,'(3a,f15.10)')&
873 & ' -- Results: --------------------------------------',ch10,&
874 & ' mu          =',rset%efermi
875  call wrtout(std_out,msg,'COLL')
876  if(get_K_S_G==1)then
877    write(msg,'(a,f15.10,6(2a,f15.10))')&
878 &   ' potmin      =',potmin,ch10,&
879 &   ' <V_eff>     =',intrhov,ch10,&
880 &   ' entropy     =',entropy,ch10,&
881 &   ' -omega/T    =',gran_pot,ch10,&
882 &   ' eigenvalues =',e_eigenvalues,ch10,&
883 &   ' kinetic     =',ek,ch10,&
884 &   ' non-loc ene =',enl
885    call wrtout(std_out,msg,'COLL')
886  end if
887  write(msg,'(a,50a)')' ',('-',ii=1,50)
888  call wrtout(std_out,msg,'COLL')
889 !write(std_out,*)'is the pressure ',gran_pot*tsmear/(rset%inf%ucvol*real(nfftrec,dp))
890 
891 !--Structured debugging : if rset%debug=T, stop here.
892  if(.false.)then !(rset%debug)
893    call wrtout(std_out,'  rhor ','PERS')
894    write(std_out,*)rhor(:,1)
895    call wrtout(std_out,' ','COLL')
896    write(msg,'(a,2d10.3)')'  temps recursion    ',tsec
897    call wrtout(std_out,msg,'COLL')
898    write(msg,'(a,l1,a)') ' vtorhorec : rset%debug=-',rset%debug,', debugging mode => stop '
899    MSG_ERROR(msg)
900  end if
901 
902  call timab(600,2,tsec2)
903  call timab(21,2,tsec)
904 
905  call symrhg(1,gprimd,irrzon,rset%mpi,nfftf,&
906 & dtset%ngfft(1)*dtset%ngfft(2)*dtset%ngfft(3),dtset%ngfft,dtset%nspden,&
907 & dtset%nsppol,dtset%nsym,dtset%paral_kgb,&
908 & phnons,rhog,rhor,rprimd,dtset%symafm,dtset%symrel)
909 
910 end subroutine vtorhorec