TABLE OF CONTENTS


ABINIT/memorf [ Functions ]

[ Top ] [ Functions ]

NAME

 memorf

FUNCTION

 Estimation of the memory needed for a response-function job.
 According to the value of the option variable,
 might also try to allocate this amount of memory, and if it fails,
 might estimate the available memory.

COPYRIGHT

 Copyright (C) 1998-2017 ABINIT group (XG)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt .

INPUTS

  cplex=1 or 2, indicate whether the den and pot functions are real or complex
  getcell=if non-zero, the values of acell and rprim are taken from
   the output of another dataset
  idtset=number of the current dataset
  intxc=control xc quadrature
  iout=unit number for output of formatted data.
  iprcel=govern the choice of preconditioner for the SCF cycle
  iscf=governs the choice of SCF algorithm, or non-SCF calculation.
  jdtset=index of the current dataset
  lmnmax=max. number of (l,m,n) components over all type of psps
  lnmax =max. number of (l,n)   components over all type of psps
  mband =maximum number of bands
  mffmem =governs the number of FFT arrays which are fit in core memory
  mgfft =maximum single fft dimension
  mkmems=number of k points which can fit in memory; set to 0 if use disk
    the three values correspond to mkmem, mkqmem and mk1mem
  mpi_enreg=information about MPI parallelization
  mpssang is 1+maximum angular momentum for nonlocal pseudopotential
  mpssoang is 1+maximum (spin*angular momentum) for nonlocal pseudopotential
  mpw   =maximum number of planewaves in basis sphere (large number)
  mqgrid=maximum dimension of grid of q values for psp representations
  natom =number of atoms in unit cell
  nband(nkpt*nsppol)=number of bands at each k point, for each polarization
  nfft  =(effective) number of FFT grid points (for one processor)
  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/input_variables/vargs.htm#ngfft
  nkpt  =number of k points
  nloalg(3)=governs the choice of the algorithm for non-local operator.
  nspden=number of spin-density components
  nspinor=number of spinorial components of the wavefunctions
  nsppol=number of channels for spin-polarization (1 or 2)
  nsym  =number of symmetry elements in space group
  ntypat=number of types of atoms
  n1xccc=dimension of xccc1d ; 0 if no XC core correction is used
  occopt=option for occupation numbers. If 3<=occopt<=7, varying occupation
  optddk=1 if ddk is computed during run
  optphon=1 if phonons are computed during run
  option : if 0 , no test of available memory
           if 1 , the routine tries to allocate the estimated memory, for testing
                    purposes, and if a failure occurs, the routine stops.
           if 2 , like 1, but before stopping, the routine will provide
                    an estimation of the available memory.
  optstrs=1 if strain perturbation is computing during run
  prtvol=control print volume
  useylm=governs the way the nonlocal operator is to be applied:
         1=using Ylm, 0=using Legendre polynomials
  use_gpu_cuda=1 if Cuda (GPU) is on
  xclevel= level of the XC functional

OUTPUT

  (only writing)

NOTES

 for the estimation, it is only taken into account those
 arrays that have some probability of being larger than 1000*8 bytes :
 - All the arrays that have large numbers as one of their dimensions
 (mqgrid, mpw, nfft, ngfft(4)*ngfft(5)*ngfft(6),n1xccc
                                      or a constant larger than 1000)
 - All the arrays that have a product of two moderately large numbers
 (potential size above 30  : mband, mgfft, mkmems, natom, nkpt, nsym,
  or a constant larger than 30)
 After this estimation, an amount of (176 + 55 + 6*natom) Kbytes is added
 to take into account the static arrays declared
 in rhohxc and daughter routines (at maximum 22*1000 dp numbers),
 as well as other arrays like
 character(len=500) :: message (present in about 100 routines), or the different
 arrays allocated in move.f, brdmin.f, gstate.f (xf array) or pspini.f
 In the case 3<=occopt<=7 this amount is increased by 760 Kbytes
 to take into account the arrays smdfun, occfun, entfun, workfun and xgrid,
 declared in getnel

 The current version takes into account only :
 1) and 2) the "main chain" in its two slightly different versions :
 driver - respfn - dfpt_looppert - dfpt_scfcv - dfpt_vtorho - dfpt_vtowfk -
     dfpt_cgwf - getghc - fourwf or (nonlop+opernl)

 Also, it is assumed that the potentials are non-local, even if there
     are local ! It would be necessary to update this routine
     now that the beginning of psp files is read before
     the present call (XG 980502)

 Some BIG approximations, not present in the GS corresponding routine
  have been done : nsym=nsym1, nkpt=nkpt_rbz, mpw=mpw1 ...

PARENTS

      memory_eval

CHILDREN

      memana,wrtout

SOURCE

111 #if defined HAVE_CONFIG_H
112 #include "config.h"
113 #endif
114 
115 #include "abi_common.h"
116 
117 subroutine memorf(cplex,n1xccc,getcell,idtset,intxc,iout,iprcel,&
118 & iscf,jdtset,lmnmax,lnmax,mband,mffmem,mgfft,&
119 & mkmems,mpi_enreg,mpsang,mpssoang,mpw,mqgrid,&
120 & natom,nband,nfft,ngfft,&
121 & nkpt,nloalg,nspden,nspinor,nsppol,nsym,ntypat,&
122 & occopt,optddk,optphon,option,optstrs,prtvol,useylm,use_gpu_cuda,xclevel)
123 
124  use defs_basis
125  use defs_abitypes
126  use m_errors
127  use m_profiling_abi
128 
129 !This section has been created automatically by the script Abilint (TD).
130 !Do not modify the following lines by hand.
131 #undef ABI_FUNC
132 #define ABI_FUNC 'memorf'
133  use interfaces_14_hidewrite
134  use interfaces_57_iovars, except_this_one => memorf
135 !End of the abilint section
136 
137  implicit none
138 
139 !Arguments ------------------------------------
140 !scalars
141  integer,intent(in) :: cplex,getcell,idtset,intxc,iout,iprcel,iscf
142  integer,intent(in) :: jdtset,lmnmax,lnmax,mband,mffmem,mgfft,mpsang
143  integer,intent(in) :: mpssoang,mpw,mqgrid,n1xccc,natom,nfft,nkpt
144  integer,intent(in) :: nspden,nspinor,nsppol,nsym,ntypat,occopt
145  integer,intent(in) :: optddk,option,optphon,optstrs,prtvol,useylm
146  integer,intent(in) :: use_gpu_cuda,xclevel
147  type(MPI_type),intent(in) :: mpi_enreg
148 !arrays
149  integer,intent(in) :: mkmems(3),nband(nkpt*nsppol),ngfft(18)
150  integer,intent(in) :: nloalg(3)
151 
152 !Local variables-------------------------------
153 !marrays= maximal number of arrays to be monitored (or group of arrays)
154 !cmpw(marrays)=count of blocks of size mpw bytes
155 !cfft(marrays)=number of blocks of size nfft bytes
156 !cadd(marrays)=additional storage needed (in bytes)
157 !dttyp(marrays)=datatype of the array : 4 for integers, 8 for real(dp)
158 !nchain= number of different chains of routines
159 !chain(marrays,nchain)=different chains of routines
160 !scalars
161  integer,parameter :: marrays=150,nchain=2
162  integer :: fftalgb,matblk,maxmkmem,mincat,mk1mem,mkmem,mkqmem,mu,n_fftgr
163  integer :: narr_fourdp,ngrad,nprocwf
164  integer :: my_natom
165  real(dp) :: mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm
166  character(len=500) :: message
167  character(len=1) :: firstchar
168 !arrays
169  integer :: dttyp(marrays)
170  real(dp) :: cadd(marrays),cfft(marrays),cmpw(marrays)
171  real(dp),allocatable :: cfft_dum(:)
172  logical :: chain(marrays,nchain)
173 
174 ! **************************************************************************
175 
176  if(option<0 .or. option>2)then
177    write(message, '(a,i0,a)')'option= ',option,' while the only allowed values are 0, 1, or 2.'
178    MSG_BUG(message)
179  end if
180 
181  firstchar=' ';if (use_gpu_cuda==1) firstchar='_'
182  cmpw(:)=zero ; cfft(:)=zero ; cadd(:)=zero 
183  dttyp(:)=0
184 
185  call wrtout(std_out,' memorf : analysis of memory needs ','COLL')
186 
187  if(jdtset>=100)then
188    write(message,'(80a,a,a,i5,a)')('=',mu=1,80),ch10,&
189 &   ' Values of the parameters that define the memory need for DATASET',jdtset,&
190 &   ' (RF).'
191  else if(jdtset/=0)then
192    write(message,'(80a,a,a,i3,a)')('=',mu=1,80),ch10,&
193 &   ' Values of the parameters that define the memory need for DATASET',jdtset,&
194 &   ' (RF).'
195  else
196    write(message,'(80a,a,a,a)')('=',mu=1,80),ch10,&
197 &   ' Values of the parameters that define the memory need of the present run',&
198 &   ' (RF).'
199  end if
200  call wrtout(iout,message,'COLL')
201  call wrtout(std_out,message,'COLL')
202 
203  mkmem=mkmems(1)
204  mkqmem=mkmems(2)
205  mk1mem=mkmems(3)
206  my_natom=natom;if (mpi_enreg%nproc_atom>1) my_natom=mpi_enreg%my_natom
207 
208  write(message,'( 4(a,i8),a,4(a,i8) )' ) &
209 & '     intxc =',intxc   ,'      iscf =',iscf,&
210 & '    lmnmax =',lmnmax  ,'     lnmax =',lnmax,ch10,&
211 & '     mgfft =',mgfft,'  mpssoang =',mpssoang,&
212 & '    mqgrid =',mqgrid,'     natom =',natom
213  call wrtout(iout,message,'COLL')
214  call wrtout(std_out,message,'COLL')
215 
216  write(message,'( 4(a,i8),a,4(a,i8),a,4(a,i8) )' ) &
217 & '  nloc_mem =',nloalg(2)*(nloalg(3)+1),'    nspden =',nspden ,&
218 & '   nspinor =',nspinor,'    nsppol =',nsppol ,ch10,&
219 & '      nsym =',nsym,'    n1xccc =',n1xccc ,&
220 & '    ntypat =',ntypat,'    occopt =',occopt ,ch10,&
221 & '   xclevel =',xclevel
222  call wrtout(iout,message,'COLL')
223  call wrtout(std_out,message,'COLL')
224 
225  write(message,'(4(3(a,i12),a))') &
226 & '-    mband =',mband  ,'        mffmem =',mffmem,&
227 & '         mkmem =',mkmem  ,ch10,&
228 & '-   mkqmem =',mkqmem ,'        mk1mem =',mk1mem,&
229 & '           mpw =',mpw  ,ch10,&
230 & '      nfft =',nfft ,'          nkpt =',nkpt
231  call wrtout(iout,message,'COLL')
232  call wrtout(std_out,message,'COLL')
233 
234  if (my_natom/=natom)then
235    write(message,'(a,i10)') 'Pmy_natom=',my_natom
236    call wrtout(iout,message,'COLL')
237    call wrtout(std_out,message,'COLL')
238  end if
239 
240  write(message,'(80a)') ('=',mu=1,80)
241  call wrtout(iout,message,'COLL')
242  call wrtout(std_out,message,'COLL')
243 
244  if(getcell>0 .or. (getcell<0 .and. idtset+getcell>0) )then
245    write(message,'(a,a,a,a,a,a,i3,a,i3,a,a,a,a,a,a)' )ch10,&
246 &   ' memorf : COMMENT -',ch10,&
247 &   '  The determination of memory needs at this stage is meaningless,',ch10,&
248 &   '  since getcell = ',getcell,' is non-zero, while idtset=',idtset,'.',ch10,&
249 &   '  The following numbers are obtained by supposing that acell and rprim',ch10,&
250 &   '  are NOT taken from a previous dataset. You cannot rely on them.',ch10
251    call wrtout(iout,message,'COLL')
252    call wrtout(std_out,message,'COLL')
253  end if
254 
255  n_fftgr=1
256  if(iscf==1)            n_fftgr=5
257  if(iscf==2.or.iscf==3) n_fftgr=4
258  if(iscf==5.or.iscf==6) n_fftgr=10
259 
260 !work1 and work2 in fourdp : take into account approximately fftalgb
261  fftalgb=mod(ngfft(7),100)/10
262  if(fftalgb==0)narr_fourdp=2*2
263  if(fftalgb==1)narr_fourdp=2
264 
265  ngrad=1
266  if(xclevel==2)ngrad=2
267 
268 !(0)                     in main, driver, and respfn -------------------
269 !indsym (respfn)
270  cadd(1)=4*nsym*natom          ; dttyp(1)=4
271 !rhor,rhog (respfn)
272  cfft(2)=nspden+2              ; dttyp(2)=8
273 !occ (driver), doccde (respfn)
274  cadd(3)=2*mband*nkpt*nsppol   ; dttyp(3)=8
275 !qgrid,vlspl,ffspl (driver)
276  cadd(4)=mqgrid*(1+2*ntypat*(1+lnmax))   &
277 & ; dttyp(4)=8
278 !xccc1d (driver)
279  cadd(5)=n1xccc*6*ntypat       ; dttyp(5)=8
280 !vtrial (respfn)
281  cfft(6)=nspden                ; dttyp(6)=8
282 !kxc (respfn)
283  cfft(7)=2*nspden-1            ; dttyp(7)=8
284 
285 !(1-2)                   in dfpt_looppert --------------------------------------
286 !ph1d
287  cadd(11)=2*3*(2*mgfft+1)*natom ; dttyp(11)=8
288 !vpsp1
289  cfft(12)=cplex                ; dttyp(12)=8
290 !indsy1  assume that nsym=nsym1
291  cadd(13)=4*nsym*natom         ; dttyp(13)=4
292 !irrzonr1 and phnons1  assume that nsym=nsym1
293  if(nsym/=1)then
294    cfft(14)=(2+(nspden/4))*((nspden/nsppol)-3*nspden/3)     ; dttyp(14)=4
295    cfft(15)=2*((nspden/nsppol)-3*nspden/3)                  ; dttyp(15)=8
296  end if
297 !doccde_rbz, eigen0, eigenq, occ_rbz, docckqde, occkq, resid
298 !assume than nkpt=nkpt_rbz
299  cadd(16)=7*mband*nkpt*nsppol  ; dttyp(16)=8
300 !kg
301  cmpw(18)=3*mkmem              ; dttyp(18)=4
302 !cg
303  cmpw(19)=2*nspinor*mband*mkmem*nsppol  ; dttyp(19)=8
304 !kg1
305  cmpw(21)=3*mk1mem             ; dttyp(21)=4
306 !cgq
307  cmpw(22)=2*nspinor*mband*mkqmem*nsppol  ; dttyp(22)=8
308 !cg1
309  cmpw(23)=2*nspinor*mband*mk1mem*nsppol  ; dttyp(23)=8
310 !rhor1,rhog1
311  cfft(24)=cplex*nspden+2       ; dttyp(24)=8
312 !eigen1
313 !assume than nkpt=nkpt_rbz
314  cadd(25)=2*mband*mband*nkpt*nsppol      ; dttyp(25)=8
315 !ylm
316  cmpw(26)=mkmem*mpsang*mpsang*useylm     ; dttyp(26)=8
317 
318 !(3)                     in dfpt_scfcv --------------------------------------
319 
320 !vhartr1,vtrial1,vxc
321  cfft(31)=cplex+cplex*nspden+nspden      ; dttyp(31)=8
322  if(iscf>0)then
323 !  f_fftgr
324    cfft(32)=cplex*nspden*n_fftgr*mffmem    ; dttyp(32)=8
325  end if
326 
327 !(4)                   in dfpt_vtorho----------------------------------------
328 
329 !proc_distrb
330  cadd(41)=nkpt*mband*nsppol    ; dttyp(41)=4
331 !kg_k,kg1_k
332  cmpw(42)=6                    ; dttyp(42)=4
333 !rhoaug1, vlocal, vlocal1
334  cfft(43)=2*cplex+1            ; dttyp(43)=8
335  cadd(43)=(2*cplex+1)*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
336 
337  if(mkqmem==0)then
338 !  cgq_disk
339    cmpw(45)=2*nspinor*mband      ; dttyp(45)=8
340  end if
341 !doccde_k,doccde_kq,eig0_k, ..., eig1_k, rocceig
342  cadd(47)=(14+3*mband)*mband   ; dttyp(47)=8
343 !ylm_k,ylm1_k
344  cmpw(49)=2*mpsang*mpsang*useylm  ; dttyp(49)=8
345 
346 !(5)                     in dfpt_vtowfk --------------------------------------
347 
348 !dkinpw,kinpw1
349  cmpw(51)=2                    ; dttyp(51)=8
350 !ffnlk,ffnl1,ffnlkq
351  cmpw(52)=2*(ntypat+2)*lmnmax  ; dttyp(52)=8
352 !ghc,gvnlc,gvnl1
353  cmpw(53)=6*nspinor            ; dttyp(53)=8
354 !ph3d
355  matblk=NLO_MINCAT
356  if(nloalg(2)<=0)matblk=natom
357  cmpw(54)=2*matblk             ; dttyp(54)=8
358 !wfraug,wfraug1,rhoaug
359  cfft(55)=5                    ; dttyp(55)=8
360  cadd(55)=5*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
361 !cwavef,cwave0,cwave1
362  cmpw(56)=6*nspinor            ; dttyp(56)=8
363 
364 !(6)                     in dfpt_cgwf ----------------------------------------
365 
366 !gh1, gh_direc, gvnl_direc, conjgr, direc, vresid, cwaveq
367  cmpw(61)=14*nspinor            ; dttyp(61)=8
368 
369 !(9a)                    in getghc and fourwf----------------------------
370 
371 !work (in getghc)
372  cfft(91)=2                    ; dttyp(91)=8
373  cadd(92)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
374 !work1 (in fourwf)
375  cfft(92)=2                    ; dttyp(92)=8
376  cadd(92)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
377 
378 !(9b)                    in getghc, nonlop and opernl--------------------
379  mincat=min(NLO_MINCAT,natom-ntypat+1)
380  if (useylm==0) then                          ! ===== nonlop_pl
381 !  gxa  (in nonlop)
382    cadd(94)=2*20*mincat*2       ; dttyp(94)=8
383 !  dgxdt  (in nonlop)
384    cadd(95)=2*3*20*mincat*2    ; dttyp(95)=8
385 !  dgxds  (in nonlop)
386    cadd(96)=2*56*mincat*2      ; dttyp(96)=8
387 !  teffv (in opernl4 - no distinction is made for opernl, opernl2 or opernl3)
388 !  kpgx, ffkg
389 !  here, evaluate an upper value, with nproj=2, p,d and f orbitals, but not
390 !  considering the stress, since it will be called outside of the main chain
391    cadd(97)=NLO_MBLKPW*40        ; dttyp(97)=8
392 !  kpg if nloalg(3)=1
393    cadd(98)=3*mpw*nloalg(3)     ; dttyp(98)=8
394  else                                        ! ===== nonlop_ylm
395 !  gx + gxfac
396    cadd(94)=2* 2*mpw*lmnmax*mincat    ; dttyp(94)=8
397 !  dgxdt + dgxdtfac + d2gxdt
398    if (optddk>0.and.optphon==0.and.optstrs==0) cadd(95)=2*2*mpw*lmnmax*mincat
399    if (optphon>0) cadd(95)=12*2*mpw*lmnmax*mincat
400    if (optstrs>0) cadd(95)=72*2*mpw*lmnmax*mincat
401    dttyp(95)=8
402 !  kpg
403    cadd(96)=2*3*mpw       ; dttyp(96)=8
404    if (optphon>0) cadd(96)=cadd(96)+2*6*mpw
405 !  miscelaneous: indlmn_typ, ffnl_typ
406    cadd(97)=lmnmax*(6+mpw*(2+optstrs)); dttyp(97)=8
407 !  opernla_ylm: scalar,scali,scalarr,scalari
408    cadd(98)=2*mpw+2*mpw
409    if (optddk>0.and.optstrs==0) cadd(98)=cadd(98)+2*mpw
410    if (optstrs>0) cadd(98)=cadd(98)+9*2*mpw
411    dttyp(98)=8
412  end if
413 
414 !--------------------------------------------------------------------------
415 
416  chain(:,:)=.true.
417 
418 !Define the main chain version a (fourwf)
419  chain(93:100,1)=.false.
420 
421 !Define the main chain version b (nonlop+opernl)
422  chain(91:92,2)=.false.
423 
424 !The memory needed for each chain has been computed
425 !-------------------------------------------------------------------------
426 !Still need some auxiliary data : estimate the disk space
427 !or the maximum segment size.
428 
429 !XG030513 : MPIWF need to multiply mbdiskwf by the number of processors
430 !in the WF group. For the time being, nprocwf=1
431  nprocwf=mpi_enreg%nproc_fft
432 
433  mbdiskwf=(8*2*mpw*nprocwf*sum(nband(1:nkpt*nsppol)))/1024._dp**2 + 0.002_dp
434  mbdiskpd=(8*nfft*nsppol)/1024._dp**2 + 0.002_dp
435 
436 !Determine the largest array out of cg,cg1,cgq, cg_disk or f_fftgr (f_fftgr_disk)
437  if(mkmem==0 .and. mk1mem==0 .and. mkqmem==0)then
438    mbcg=(8*2*mpw*nspinor*mband)/1024._dp**2 + 0.002_dp
439  else
440    maxmkmem=maxval(mkmems(:))
441    mbcg=(8*2*mpw*nspinor*mband*maxmkmem*nsppol)/1024._dp**2 + 0.002_dp
442  end if
443  if(mffmem==0)then
444    mbf_fftgr=(8*cplex*nfft*n_fftgr)/1024._dp**2 + 0.002_dp
445  else
446    mbf_fftgr=(8*cplex*nfft*n_fftgr*nspden*mffmem)/1024._dp**2 + 0.002_dp
447  end if
448 
449 !---------------------------------------------------------------------
450 !Now, analyze the data
451 
452 !DEBUG
453 !write(std_out,*)' memorf : nchain=',nchain
454 !ENDDEBUG
455 
456  ABI_ALLOCATE(cfft_dum,(marrays))
457  cfft_dum=zero
458  mbgylm=zero
459  call memana(cadd,cfft,cfft_dum,chain,cmpw,dttyp,iout,iprcel,iscf,&
460 & marrays,mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm,mffmem,&
461 & mpw,natom,nchain,nfft,nfft,occopt,option,prtvol)
462  ABI_DEALLOCATE(cfft_dum)
463 
464 end subroutine memorf