TABLE OF CONTENTS


ABINIT/m_memeval [ Modules ]

[ Top ] [ Modules ]

NAME

 m_memeval

FUNCTION

  Functions to estimate memory requirements from the calculation parameters.

COPYRIGHT

  Copyright (C) 2008-2018 ABINIT group (XG, DC, DW)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

PARENTS

CHILDREN

SOURCE

21 #if defined HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24 
25 #include "abi_common.h"
26 
27 MODULE m_memeval
28 
29  use defs_basis
30  use defs_datatypes
31  use defs_abitypes
32  use m_abicore
33  use m_xmpi
34  use m_errors
35 
36  use m_geometry,      only : mkradim, mkrdim, xred2xcart, metric
37  use m_symtk,         only : mati3inv, littlegroup_q
38  use m_spgdata,       only : prtspgroup
39  use m_fftcore,       only : getng
40  use m_kg,            only : getmpw
41  use m_libpaw_tools,  only : libpaw_write_comm_set
42 
43  implicit none
44 
45  private

m_memeval/getdim_nloc [ Functions ]

[ Top ] [ m_memeval ] [ Functions ]

NAME

 getdim_nloc

FUNCTION

 Determine the dimensions of arrays that contain
 the definition of non-local projectors : ekb, ffspl, indlmn

INPUTS

  mixalch(npspalch,ntypalch,nimage)=alchemical mixing coefficients
  nimage=number of images
  npsp=number of pseudopotentials
  npspalch=number of pseudopotentials for alchemical purposes
  ntypat=number of types of pseudo atoms
  ntypalch=number of types of alchemical pseudo atoms
  pspheads(npsp)=<type pspheader_type>all the important information from the
   pseudopotential file headers, as well as the psp file names

OUTPUT

  lmnmax=maximum number of l,m,n projectors, not taking into account the spin-orbit
  lmnmaxso=maximum number of l,m,n projectors, taking into account the spin-orbit
  lnmax=maximum number of l,n projectors, not taking into account the spin-orbit
  lnmaxso=maximum number of l,n projectors, taking into account the spin-orbit

PARENTS

      m_psps,memory_eval

CHILDREN

      wrtout

SOURCE

2348 subroutine getdim_nloc(lmnmax,lmnmaxso,lnmax,lnmaxso,mixalch,nimage,npsp,npspalch,&
2349 & ntypat,ntypalch,pspheads)
2350 
2351 
2352 !This section has been created automatically by the script Abilint (TD).
2353 !Do not modify the following lines by hand.
2354 #undef ABI_FUNC
2355 #define ABI_FUNC 'getdim_nloc'
2356 !End of the abilint section
2357 
2358  implicit none
2359 
2360 !Arguments ------------------------------------
2361 !scalars
2362  integer,intent(in) :: nimage,npsp,npspalch,ntypalch,ntypat
2363  integer,intent(out) :: lmnmax,lmnmaxso,lnmax,lnmaxso
2364 !arrays
2365  real(dp),intent(in) :: mixalch(npspalch,ntypalch,nimage)
2366  type(pspheader_type),intent(in) :: pspheads(npsp)
2367 
2368 !Local variables-------------------------------
2369 !scalars
2370  integer :: ilang,ipsp,ipspalch,itypalch,itypat,ntyppure
2371 !integer :: llmax
2372  character(len=500) :: message
2373 !arrays
2374  integer,allocatable :: lmnproj_typat(:),lmnprojso_typat(:),lnproj_typat(:)
2375  integer,allocatable :: lnprojso_typat(:),nproj_typat(:,:),nprojso_typat(:,:)
2376 
2377 ! *************************************************************************
2378 
2379 !write(std_out,*)' getdim_nloc: 'pspheads(1)%nproj(0:3)=',pspheads(1)%nproj(0:3)
2380 
2381  ABI_ALLOCATE(lmnproj_typat,(ntypat))
2382  ABI_ALLOCATE(lmnprojso_typat,(ntypat))
2383  ABI_ALLOCATE(lnproj_typat,(ntypat))
2384  ABI_ALLOCATE(lnprojso_typat,(ntypat))
2385  ABI_ALLOCATE(nproj_typat,(0:3,ntypat))
2386  ABI_ALLOCATE(nprojso_typat,(3,ntypat))
2387  lmnproj_typat(:)=0 ; lmnprojso_typat(:)=0
2388  lnproj_typat(:)=0 ; lnprojso_typat(:)=0
2389  nproj_typat(:,:)=0 ; nprojso_typat(:,:)=0
2390 
2391  ntyppure=ntypat-ntypalch
2392 
2393 !For each type of pseudo atom, compute the number of projectors
2394 !First, pure pseudo atoms
2395  if(ntyppure>0)then
2396    do itypat=1,ntyppure
2397      nproj_typat(0:3,itypat)=pspheads(itypat)%nproj(0:3)
2398      nprojso_typat(:,itypat)=pspheads(itypat)%nprojso(:)
2399    end do
2400  end if
2401 
2402 !Then, alchemical pseudo atoms
2403  if(ntypalch>0)then
2404    do itypat=ntyppure+1,ntypat
2405      itypalch=itypat-ntyppure
2406      do ipsp=ntyppure+1,npsp
2407        ipspalch=ipsp-ntyppure
2408 !      If there is some mixing, must accumulate the projectors
2409        if(sum(abs(mixalch(ipspalch,itypalch,:)))>tol10)then
2410          nproj_typat(0:3,itypat)=nproj_typat(0:3,itypat)+pspheads(ipsp)%nproj(0:3)
2411          nprojso_typat(:,itypat)=nprojso_typat(:,itypat)+pspheads(ipsp)%nprojso(:)
2412        end if
2413      end do
2414    end do
2415  end if
2416 
2417 !Now that the number of projectors is known, accumulate the dimensions
2418  do itypat=1,ntypat
2419    do ilang=0,3
2420      lnproj_typat(itypat)=lnproj_typat(itypat)+nproj_typat(ilang,itypat)
2421      lmnproj_typat(itypat)=lmnproj_typat(itypat)+nproj_typat(ilang,itypat)*(2*ilang+1)
2422    end do
2423    lnprojso_typat(itypat)=lnproj_typat(itypat)
2424    lmnprojso_typat(itypat)=lmnproj_typat(itypat)
2425    do ilang=1,3
2426      lnprojso_typat(itypat)=lnprojso_typat(itypat)+nprojso_typat(ilang,itypat)
2427      lmnprojso_typat(itypat)=lmnprojso_typat(itypat)+nprojso_typat(ilang,itypat)*(2*ilang+1)
2428    end do
2429  end do
2430 
2431 !Compute the maximal bounds, at least equal to 1, even for local psps
2432  lmnmax=1;lmnmaxso=1;lnmax=1;lnmaxso=1
2433  do itypat=1,ntypat
2434    lmnmax  =max(lmnmax  ,lmnproj_typat  (itypat))
2435    lmnmaxso=max(lmnmaxso,lmnprojso_typat(itypat))
2436    lnmax   =max(lnmax   ,lnproj_typat   (itypat))
2437    lnmaxso =max(lnmaxso ,lnprojso_typat (itypat))
2438  end do
2439 !The initial coding (below) was not totally portable (MT 110215)
2440 !lmnmax=max(maxval(lmnproj_typat(1:ntypat)),1)
2441 !lmnmaxso=max(maxval(lmnprojso_typat(1:ntypat)),1)
2442 !lnmax=max(maxval(lnproj_typat(1:ntypat)),1)
2443 !lnmaxso=max(maxval(lnprojso_typat(1:ntypat)),1)
2444 
2445  if(maxval(lmnproj_typat(1:ntypat))==0)then
2446    write(message, '(3a)' )&
2447 &   'Despite there is only a local part to pseudopotential(s),',ch10,&
2448 &   'lmnmax and lnmax are set to 1.'
2449    MSG_COMMENT(message)
2450  end if
2451 
2452 !XG040806 : These lines make modifications of lnmax and lmnmax
2453 !that are unjustified in many cases, according to the many tests cases
2454 !where they produce a changes, while the test case was working properly.
2455 !One should understand better the needs, and code more appropriate changes ...
2456 !lnmax/lmnmax has to be bigger than 1+lmax (for compatibility reasons)
2457 !llmax=maxval(pspheads(1:ntypat)%lmax)+1 ! And this line might have trouble with HP compiler
2458 !if (lnmax   <llmax) lnmax=llmax
2459 !if (lnmaxso <llmax) lnmaxso=llmax
2460 !if (lmnmax  <llmax) lmnmax=llmax
2461 !if (lmnmaxso<llmax) lmnmaxso=llmax
2462 
2463  write(message, '(a,a,i4,a,i4,3a,i4,a,i4,a)' ) ch10,&
2464 & ' getdim_nloc : deduce lmnmax  =',lmnmax,', lnmax  =',lnmax,',',ch10,&
2465 & '                      lmnmaxso=',lmnmaxso,', lnmaxso=',lnmaxso,'.'
2466  call wrtout(std_out,message,'COLL')
2467 
2468  ABI_DEALLOCATE(lmnproj_typat)
2469  ABI_DEALLOCATE(lmnprojso_typat)
2470  ABI_DEALLOCATE(lnproj_typat)
2471  ABI_DEALLOCATE(lnprojso_typat)
2472  ABI_DEALLOCATE(nproj_typat)
2473  ABI_DEALLOCATE(nprojso_typat)
2474 
2475 end subroutine getdim_nloc

m_memeval/memana [ Functions ]

[ Top ] [ m_memeval ] [ Functions ]

NAME

 memana

FUNCTION

 Analysis of the memory and disk space needed for the job,
 thanks to the data computed in the calling routine: for each
 array, the number of blocks of size mpw or nfft bytes, and the
 additional memory occupation;
 the list of arrays that are used for each chain.

 According to the value of the option variable,
 the routine will eventually try to allocate this amount of memory,
 and if it fails, estimate the maximum value nfft compatible with
 the available memory.

INPUTS

  cadd(marrays)= count of bytes needed in addition of cmpw, cfftc and cfft.
  cfft(marrays) =for each array, count of blocks of size nfft bytes (coarse grid, if PAW)
  cfftf(marrays)=for each array, count of blocks of size nfft bytes (fine grid, if PAW)
  chain(marrays,nchain)=logical variable, that informs whether an array
    belongs to a given chain.
  cmpw(marrays)=for each array, count of blocks of size mpw bytes.
  dttyp(marrays)=datatype of the array : 4 for integers, 8 for real(dp)
  iout=unit number for output of formatted data.
  iprcel=govern the choice of preconditioner for the SCF cycle
  iscf=governs the choice of SCF algorithm, or non-SCF calculation.
  marrays=maximal number of arrays (or group of arrays) to be monitored.
  mbcg=number of MB needed for the cg array.
  mbdiskpd=number of MB needed to store a density or potential file on disk
  mbdiskwf=number of MB needed to store a wavefunction file on disk
  mbf_fftgr=number of MB needed for the f_fftgr array.
  mbgylm=number of MB needed for the pawfgrtab%gylm array (paw only)
  mffmem =governs the number of FFT arrays which are fit in core memory
  mpw   =maximum number of planewaves in basis sphere (large number)
  natom =number of atoms in unit cell
  nchain=number of chains to be used in the estimation of memory.
  nfft =(effective) number of FFT grid points (for one processor) (coarse grid, if PAW)
  nfftf=(effective) number of FFT grid points (for one processor) (fine grid, if PAW)
  occopt=option for occupation numbers. If 3<=occopt<=8, varying occupation
  option : if 0 , no test of available memory
           if 1 , the routine tries to allocate the estimated memory, for testing
                    purposes, and if a failure occurs, the routine stops.
           if 2 , like 1, but before stopping, the routine will provide
                    an estimation of the available memory.
  prtvol=control print volume

OUTPUT

  (only writing)

PARENTS

      memorf,memory

CHILDREN

      wrtout

SOURCE

1393 subroutine memana(cadd,cfft,cfftf,chain,cmpw,dttyp,iout,iprcel,iscf,&
1394 & marrays,mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm,mffmem,&
1395 & mpw,natom,nchain,nfft,nfftf,occopt,option,prtvol)
1396 
1397 
1398 !This section has been created automatically by the script Abilint (TD).
1399 !Do not modify the following lines by hand.
1400 #undef ABI_FUNC
1401 #define ABI_FUNC 'memana'
1402 !End of the abilint section
1403 
1404  implicit none
1405 
1406 !Arguments ------------------------------------
1407 !scalars
1408  integer,intent(in) :: iout,iprcel,iscf,marrays,mffmem,mpw,natom,nchain
1409  integer,intent(in) :: nfft,nfftf,occopt,option,prtvol
1410  real(dp),intent(in) :: mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm
1411 !arrays
1412  integer,intent(in) :: dttyp(marrays)
1413  logical,intent(in) :: chain(marrays,nchain)
1414  real(dp),intent(in) :: cadd(marrays),cfft(marrays),cfftf(marrays),cmpw(marrays)
1415 
1416 !Local variables-------------------------------
1417 !scalars
1418  integer :: biggest,ichain,ier,ier1,ier2,ier3,ier4,ier5,ier6,ier7,ier8,ii
1419 !integer :: jj,kk
1420  integer :: mu,nmbytes,nquarter_mbytes,quit
1421  real(dp) :: mbbigarr,mbbiggest
1422  character(len=500) :: message
1423 !arrays
1424  real(dp),allocatable :: bigarray(:,:),bigarray1(:,:),bigarray2(:,:)
1425  real(dp),allocatable :: bigarray3(:,:),bigarray4(:,:),bigarray5(:,:)
1426  real(dp),allocatable :: bigarray6(:,:),bigarray7(:,:),bigarray8(:,:)
1427  real(dp),allocatable :: cdpadd(:),cdpfft(:),cdpfftf(:),cdpmpw(:)
1428  real(dp),allocatable :: cintfft(:),cintfftf(:),cintmpw(:),cintadd(:)
1429  real(dp),allocatable :: mbdpadd(:),mbdpfft(:),mbdpfftf(:)
1430  real(dp),allocatable :: mbdpmpw(:),mbintadd(:),mbintfft(:),mbintfftf(:)
1431  real(dp),allocatable :: mbintmpw(:),mbother(:),mbtot(:)
1432 
1433 ! **************************************************************************
1434 
1435 !write(std_out,*)' memana : nchain=',nchain
1436 
1437  ABI_ALLOCATE(cdpfftf,(nchain))
1438  ABI_ALLOCATE(cdpfft,(nchain))
1439  ABI_ALLOCATE(cdpmpw,(nchain))
1440  ABI_ALLOCATE(cintfftf,(nchain))
1441  ABI_ALLOCATE(cintfft,(nchain))
1442  ABI_ALLOCATE(cintmpw,(nchain))
1443  ABI_ALLOCATE(cdpadd,(nchain))
1444  ABI_ALLOCATE(cintadd,(nchain))
1445  ABI_ALLOCATE(mbdpadd,(nchain))
1446  ABI_ALLOCATE(mbdpfftf,(nchain))
1447  ABI_ALLOCATE(mbdpfft,(nchain))
1448  ABI_ALLOCATE(mbdpmpw,(nchain))
1449  ABI_ALLOCATE(mbintadd,(nchain))
1450  ABI_ALLOCATE(mbintfftf,(nchain))
1451  ABI_ALLOCATE(mbintfft,(nchain))
1452  ABI_ALLOCATE(mbintmpw,(nchain))
1453  ABI_ALLOCATE(mbother,(nchain))
1454  ABI_ALLOCATE(mbtot,(nchain))
1455 
1456  biggest=0
1457  mbbiggest=0.0_dp
1458 
1459 !For each chain, compute the number of bytes
1460  do ichain=1,nchain
1461 
1462 !  First, the number of integer or real(dp), fft, mpw or add blocks
1463    cdpmpw(ichain) =sum(cmpw(:),MASK=(dttyp(:)==8).and.chain(:,ichain))
1464    cintmpw(ichain)=sum(cmpw(:),MASK=(dttyp(:)==4).and.chain(:,ichain))
1465    cdpfftf(ichain) =sum(cfftf(:),MASK=(dttyp(:)==8).and.chain(:,ichain))
1466    cintfftf(ichain)=sum(cfftf(:),MASK=(dttyp(:)==4).and.chain(:,ichain))
1467    cdpfft(ichain) =sum(cfft(:),MASK=(dttyp(:)==8).and.chain(:,ichain))
1468    cintfft(ichain)=sum(cfft(:),MASK=(dttyp(:)==4).and.chain(:,ichain))
1469    cdpadd(ichain) =sum(cadd(:),MASK=(dttyp(:)==8).and.chain(:,ichain))
1470    cintadd(ichain)=sum(cadd(:),MASK=(dttyp(:)==4).and.chain(:,ichain))
1471 
1472 !  Compute the corresponding number of Mbytes
1473    mbdpmpw(ichain) =8*cdpmpw(ichain) *dble(mpw) /1024._dp**2
1474    mbintmpw(ichain)=4*cintmpw(ichain)*dble(mpw) /1024._dp**2
1475    mbdpfftf(ichain) =8*cdpfftf(ichain) *dble(nfftf)/1024._dp**2
1476    mbintfftf(ichain)=4*cintfftf(ichain)*dble(nfftf)/1024._dp**2
1477    mbdpfft(ichain) =8*cdpfft(ichain) *dble(nfft)/1024._dp**2
1478    mbintfft(ichain)=4*cintfft(ichain)*dble(nfft)/1024._dp**2
1479    mbdpadd(ichain) =8*cdpadd(ichain)              /1024._dp**2
1480    mbintadd(ichain)=4*cintadd(ichain)             /1024._dp**2
1481    mbother(ichain) =dble(231+6*natom)/1024._dp
1482    if(3<=occopt .and. occopt<=8)mbother(ichain)=dble(991+natom)/1024._dp
1483 
1484 !  Compute the total number of Mbytes
1485    mbtot(ichain)=mbdpmpw(ichain)+mbintmpw(ichain)&
1486 &   +mbdpfftf(ichain)+mbintfftf(ichain)&
1487 &   +mbdpfft(ichain)+mbintfft(ichain)&
1488 &   +mbdpadd(ichain)+mbintadd(ichain)+mbother(ichain)
1489 
1490 !  Select the biggest chain
1491    if(mbtot(ichain)>mbbiggest)then
1492      mbbiggest=mbtot(ichain)
1493      biggest=ichain
1494    end if
1495  end do
1496 !When iprcel<20, the biggest chains cannot be number 8 or 9 ...
1497  if(modulo(iprcel,100)<20 .and. (biggest==8 .or. biggest==9))then
1498    write(message,'(a,a,a,a,i3,a,a,a)') ch10,&
1499 &   ' memana: BUG -',ch10,&
1500 &   '  The biggest chain is number',biggest,' while iprcel==20.',ch10,&
1501 &   '  This is not allowed.'
1502    call wrtout(std_out,message,'COLL')
1503  end if
1504 
1505  write(message, '(a,f11.3,a)' ) &
1506 & 'P This job should need less than                 ',&
1507 & mbbiggest+tol10,' Mbytes of memory. '
1508  call wrtout(std_out,message,'COLL')
1509  call wrtout(iout,message,'COLL')
1510 
1511  if(prtvol>=10)then
1512    if(biggest==1)write(message,'(a)')'P Max. in main chain + fourwf.f '
1513    if(biggest==2)write(message,'(a)')'P Max. in main chain + nonlop.f + opernl.f '
1514    if(biggest==3)write(message,'(a)')'P Max. in XC chain '
1515    if(biggest==4)write(message,'(a)')'P Max. in mkrho chain '
1516    if(biggest==5)write(message,'(a)')'P Max. in fourdp chain '
1517    if(biggest==6)write(message,'(a)')'P Max. in parallel k-point chain '
1518    if(biggest==7)write(message,'(a)')'P Max. in newvtr chain '
1519    if(biggest==8)write(message,'(a)')'P Max. in suscep chain '
1520    if(biggest==9)write(message,'(a)')'P Max. in dielmt chain '
1521    if(biggest==10)write(message,'(a)')'P Max. in tddft chain '
1522    call wrtout(iout,message,'COLL')
1523 
1524    write(message, '(a,i13,a,f11.3,a)' )&
1525 &   'P',nint(cintmpw(biggest)),' blocks of mpw  integer numbers, for',&
1526 &   mbintmpw(biggest)+tol10,' Mbytes. '
1527    call wrtout(iout,message,'COLL')
1528    write(message, '(a,i13,a,f11.3,a)' )&
1529 &   'P',nint(cdpmpw(biggest)),' blocks of mpw  real(dp)  numbers, for',&
1530 &   mbdpmpw(biggest)+tol10,' Mbytes. '
1531    call wrtout(iout,message,'COLL')
1532    if (nfft==nfftf) then
1533      if(mbintfft(biggest)+mbintfftf(biggest)>0.001)then
1534        write(message, '(a,i13,a,f11.3,a)' )&
1535 &       'P',nint(cintfft(biggest)+cintfftf(biggest)),' blocks of nfft integer numbers, for',&
1536 &       mbintfft(biggest)+mbintfftf(biggest)+tol10,' Mbytes. '
1537        call wrtout(iout,message,'COLL')
1538      end if
1539      write(message, '(a,i13,a,f11.3,a)' )&
1540 &     'P',nint(cdpfft(biggest)+cdpfftf(biggest)),' blocks of nfft real(dp)  numbers, for',&
1541 &     mbdpfft(biggest)+mbdpfftf(biggest)+tol10,' Mbytes. '
1542      call wrtout(iout,message,'COLL')
1543    else
1544      if(mbintfftf(biggest)>0.001)then
1545        write(message, '(a,i13,a,f11.3,a)' )&
1546 &       'P',nint(cintfftf(biggest)),' blocks of nfft (fine grid) integer numbers, for',&
1547 &       mbintfftf(biggest)+tol10,' Mbytes. '
1548        call wrtout(iout,message,'COLL')
1549      end if
1550      write(message, '(a,i13,a,f11.3,a)' )&
1551 &     'P',nint(cdpfftf(biggest)),' blocks of nfft (fine grid) real(dp)  numbers, for',&
1552 &     mbdpfftf(biggest)+tol10,' Mbytes. '
1553      call wrtout(iout,message,'COLL')
1554      if(mbintfft(biggest)>0.001)then
1555        write(message, '(a,i13,a,f11.3,a)' )&
1556 &       'P',nint(cintfft(biggest)),' blocks of nfft (coarse grid) integer numbers, for',&
1557 &       mbintfft(biggest)+tol10,' Mbytes. '
1558        call wrtout(iout,message,'COLL')
1559      end if
1560      write(message, '(a,i13,a,f11.3,a)' )&
1561 &     'P',nint(cdpfft(biggest)),' blocks of nfft (coarse grid) real(dp)  numbers, for',&
1562 &     mbdpfft(biggest)+tol10,' Mbytes. '
1563      call wrtout(iout,message,'COLL')
1564    end if
1565    if(mbintadd(biggest)>0.001)then
1566      write(message, '(a,13x,a,f11.3,a)' )&
1567 &     'P',' Additional     integer numbers, for',mbintadd(biggest)+tol10,' Mbytes. '
1568      call wrtout(iout,message,'COLL')
1569    end if
1570    write(message, '(a,13x,a,f11.3,a)' )&
1571 &   'P',' Additional     real(dp)  numbers, for',mbdpadd(biggest)+tol10,' Mbytes. '
1572    call wrtout(iout,message,'COLL')
1573    write(message, '(a,13x,a,f11.3,a)' )&
1574 &   'P',' With residue estimated to be       ',mbother(biggest)+tol10,' Mbytes. '
1575    call wrtout(iout,message,'COLL')
1576    write(message, '(a)' )'P'
1577    call wrtout(iout,message,'COLL')
1578    write(message, '(a)' )&
1579 &   'P Comparison of the memory needs of different chains'
1580    call wrtout(iout,message,'COLL')
1581 
1582    write(message, '(a,f11.3,a)' )&
1583 &   'P Main chain + fourwf.f           ',mbtot(1)+tol10,' Mbytes. '
1584    call wrtout(iout,message,'COLL')
1585    write(message, '(a,f11.3,a)' )&
1586 &   'P Main chain + nonlop.f + opernl.f',mbtot(2)+tol10,' Mbytes. '
1587    call wrtout(iout,message,'COLL')
1588 
1589 !  The next chains are not defined in the RF case.
1590    if(nchain>2)then
1591      write(message, '(a,f11.3,a)' )&
1592 &     'P XC chain                        ',mbtot(3)+tol10,' Mbytes. '
1593      call wrtout(iout,message,'COLL')
1594      write(message, '(a,f11.3,a)' )&
1595 &     'P mkrho chain                     ',mbtot(4)+tol10,' Mbytes. '
1596      call wrtout(iout,message,'COLL')
1597      write(message, '(a,f11.3,a)' )&
1598 &     'P fourdp chain                    ',mbtot(5)+tol10,' Mbytes. '
1599      call wrtout(iout,message,'COLL')
1600      if(xmpi_paral==1)then
1601        write(message, '(a,f11.3,a)' )&
1602 &       '- parallel k-point chain          ',mbtot(6)+tol10,' Mbytes. '
1603        call wrtout(iout,message,'COLL')
1604      end if
1605      write(message, '(a,f11.3,a)' )&
1606 &     'P newvtr chain                    ',mbtot(7)+tol10,' Mbytes. '
1607      call wrtout(iout,message,'COLL')
1608      if(modulo(iprcel,100)>=20.and.modulo(iprcel,100)<70)then
1609        write(message, '(a,f11.3,a)' )&
1610 &       'P suscep chain                    ',mbtot(8)+tol10,' Mbytes. '
1611        call wrtout(iout,message,'COLL')
1612        write(message, '(a,f11.3,a)' )&
1613 &       'P dielmt chain                    ',mbtot(9)+tol10,' Mbytes. '
1614        call wrtout(iout,message,'COLL')
1615      end if
1616      if(iscf==-1)then
1617        write(message, '(a,f11.3,a)' )&
1618 &       'P tddft  chain                    ',mbtot(10)+tol10,' Mbytes. '
1619      end if
1620    end if ! nchain>2
1621 
1622  end if
1623 
1624 !--------------------------------------------------------------------
1625 
1626  write(message, '(a)' ) &
1627 & '  Rough estimation (10% accuracy) of disk space for files :'
1628  call wrtout(iout,message,'COLL')
1629  call wrtout(std_out,message,'COLL')
1630 
1631  write(message, '(a,f11.3,a,a,f11.3,a)' ) &
1632 & '_ WF disk file :',mbdiskwf+tol10,' Mbytes ;',&
1633 & ' DEN or POT disk file :',mbdiskpd+tol10,' Mbytes.'
1634  call wrtout(iout,message,'COLL')
1635  call wrtout(std_out,message,'COLL')
1636 
1637  if(mffmem==0 .and. iscf>0)then
1638    if(iscf==1)then
1639      write(message, '(a,a,a)' )&
1640 &     '  mffmem==0, iscf==1 => use of 1 FFT temporary disk file,',ch10,&
1641 &     '                       5 times bigger than a DEN file.'
1642    else if(iscf==2.or.iscf==12)then
1643      write(message, '(a,a,a)' )&
1644 &     '  mffmem==0, iscf==2 => use of 1 FFT temporary disk file,',ch10,&
1645 &     '                       3 times bigger than a DEN file.'
1646    else if(iscf==3.or.iscf==13)then
1647      write(message, '(a,a,a)' )&
1648 &     '  mffmem==0, iscf==3 => use of 1 FFT temporary disk file,',ch10,&
1649 &     '                       4 times bigger than a DEN file.'
1650    else if(iscf==4.or.iscf==14)then
1651      write(message, '(a,a,a)' )&
1652 &     '  mffmem==0, iscf==4 => use of 1 FFT temporary disk file,',ch10,&
1653 &     '                       6 times bigger than a DEN file.'
1654    else if(iscf==5)then
1655      write(message, '(a,a,a)' )&
1656 &     '  mffmem==0, iscf==5 => use of 1 FFT temporary disk file,',ch10,&
1657 &     '                       10 times bigger than a DEN file.'
1658    else if(iscf==6)then
1659      write(message, '(a,a,a)' )&
1660 &     '  mffmem==0, iscf==6 => use of 1 FFT temporary disk file,',ch10,&
1661 &     '                       10 times bigger than a DEN file.'
1662    else if(iscf==7.or.iscf==17)then
1663      write(message, '(a,a,a)' )&
1664 &     '  mffmem==0, iscf==7 => use of 1 FFT temporary disk file,',ch10,&
1665 &     '                       (2+2*npulayit) times bigger than a DEN file.'
1666    end if
1667    call wrtout(iout,message,'COLL')
1668    call wrtout(std_out,message,'COLL')
1669  end if
1670 
1671 !Temporary message - estimation of PAW specific data has to be done...
1672 !Have to add the usepaw argument to use this.
1673 !if (usepaw==1) then
1674 !write(message,'(5a)') '  WARNING: You are using PAW formalism;',ch10,&
1675 !&       '           Above estimations do not take PAW',ch10,&
1676 !&       '           specific data into account !'
1677 !call wrtout(iout,message,'COLL')
1678 !call wrtout(std_out,message,'COLL')
1679 !end if
1680 
1681  write(message,'(80a,a)') ('=',mu=1,80),ch10
1682  call wrtout(iout,message,'COLL')
1683  call wrtout(std_out,message,'COLL')
1684 
1685 !--------------------------------------------------------------------
1686 !Here, each processor must test its memory, so use
1687 !the PERS mode for error messages, followed by synchronisation
1688 
1689  mbbigarr=max(mbf_fftgr,mbcg,mbgylm)
1690  if(mbbigarr==mbcg) then
1691    write(message, '(a,f12.4,a)' ) &
1692 &   ' Biggest array : cg(disk), with',mbcg+tol10,' MBytes.'
1693  else if (mbbigarr==mbf_fftgr) then
1694    write(message, '(a,f12.4,a)' ) &
1695 &   ' Biggest array : f_fftgr(disk), with',mbf_fftgr+tol10,' MBytes.'
1696  else if (mbbigarr==mbgylm)then
1697    write(message, '(a,f12.4,a)' ) &
1698 &   ' Biggest array : pawfgrtab%gylm(gr), with',mbgylm+tol10,' MBytes.'
1699  end if
1700  call wrtout(std_out,message,'COLL')
1701 
1702 !if (mpi_enreg%my_nimage>1) then
1703 !write(message, '(a,f12.4,a)' ) &
1704 !&   ' These estimations take the distribution over replicas (images) of the cell into account.'
1705 !call wrtout(std_out,message,'COLL')
1706 !end if
1707 
1708  quit=0
1709 
1710  if(option>=1)then
1711 
1712 !  Test the ability to allocate the biggest array
1713    nquarter_mbytes=4.0_dp*mbbigarr+1.0_dp
1714    ABI_STAT_ALLOCATE(bigarray,(32*1024,nquarter_mbytes), ier)
1715    if(ier/=0)then
1716      write(message,'(a,f11.3,a,a,a,a,a,a,a)')&
1717 &     'Test failed to allocate an array of',mbbigarr,' Mbytes',ch10,&
1718 &     'It is not worth to continue ',ch10,&
1719 &     'Action: modify input variable to fit the available memory,',ch10,&
1720 &     'increase limit on maximal array size or set mem_test to 0 to disable this test.'
1721      call wrtout(std_out,message,'PERS')
1722      if(option==1)then
1723        MSG_ERROR_CLASS(message, "MemanaError")
1724      else
1725        MSG_WARNING(message)
1726        quit=1
1727      end if
1728    end if
1729    if(quit==0)then
1730      write(message,'(a,f11.3,a)')&
1731 &     ' memana : allocated an array of',mbbigarr+tol10,' Mbytes, for testing purposes. '
1732      call wrtout(std_out,message,'COLL')
1733    end if
1734    if(allocated(bigarray)) then
1735      ABI_DEALLOCATE(bigarray)
1736    end if
1737 
1738 !  Test the ability to allocate the needed total memory : use 8 segments,
1739 !  hoping that the maximal segment size is not so much smaller than the
1740 !  total memory
1741    nquarter_mbytes=0.5_dp*mbbiggest+1.0_dp
1742    ABI_STAT_ALLOCATE(bigarray1,(32*1024,nquarter_mbytes), ier1)
1743    ABI_STAT_ALLOCATE(bigarray2,(32*1024,nquarter_mbytes), ier2)
1744    ABI_STAT_ALLOCATE(bigarray3,(32*1024,nquarter_mbytes), ier3)
1745    ABI_STAT_ALLOCATE(bigarray4,(32*1024,nquarter_mbytes), ier4)
1746    ABI_STAT_ALLOCATE(bigarray5,(32*1024,nquarter_mbytes), ier5)
1747    ABI_STAT_ALLOCATE(bigarray6,(32*1024,nquarter_mbytes), ier6)
1748    ABI_STAT_ALLOCATE(bigarray7,(32*1024,nquarter_mbytes), ier7)
1749    ABI_STAT_ALLOCATE(bigarray8,(32*1024,nquarter_mbytes), ier8)
1750 
1751    if(ier1/=0 .or. ier2/=0 .or. ier3/=0 .or. ier4/=0 .or.&
1752 &   ier5/=0 .or. ier6/=0 .or. ier7/=0 .or. ier8/=0) then
1753      write(message,'(a,f11.3,a,a,a,a,a,a,a)')&
1754 &     'Test failed to allocate ',mbbiggest,' Mbytes',ch10,&
1755 &     'It is not worth to continue ',ch10,&
1756 &     'Action: modify input variables or submission parameters to fit the available memory,',ch10,&
1757 &     'increase limit on available memory or set mem_test to 0 to disable this test.'
1758      if(option==1)then
1759        MSG_ERROR_CLASS(message, "MemanaError")
1760      else
1761        MSG_WARNING(message)
1762        quit=1
1763      end if
1764    end if
1765 
1766    if(quit==0)then
1767      write(message,'(a,f11.3,a,a,a)')&
1768 &     ' memana: allocated ',mbbiggest,'Mbytes, for testing purposes. ',ch10,&
1769 &     ' The job will continue.'
1770      call wrtout(std_out,message,'COLL')
1771    end if
1772    if(allocated(bigarray1)) then
1773      ABI_DEALLOCATE(bigarray1)
1774    end if
1775    if(allocated(bigarray2)) then
1776      ABI_DEALLOCATE(bigarray2)
1777    end if
1778    if(allocated(bigarray3)) then
1779      ABI_DEALLOCATE(bigarray3)
1780    end if
1781    if(allocated(bigarray4)) then
1782      ABI_DEALLOCATE(bigarray4)
1783    end if
1784    if(allocated(bigarray5)) then
1785      ABI_DEALLOCATE(bigarray5)
1786    end if
1787    if(allocated(bigarray6)) then
1788      ABI_DEALLOCATE(bigarray6)
1789    end if
1790    if(allocated(bigarray7)) then
1791      ABI_DEALLOCATE(bigarray7)
1792    end if
1793    if(allocated(bigarray8)) then
1794      ABI_DEALLOCATE(bigarray8)
1795    end if
1796 
1797  end if
1798 
1799 !--------------------------------------------------------------------
1800 
1801  if(option==2 .and. quit==1 )then
1802 
1803 !  Estimation of the available memory
1804 !
1805 !  A quarter of Mbyte is 256*1024/8 real(dp) numbers,
1806 !  that is 32*1024 dp numbers.
1807 !  One begins with the allocation of 4 Mbytes. If successful,
1808 !  one increases that number, until the allocation is not successfull
1809 !  any more. Unfortunately, on a P6 with the pghpf compiler, the
1810 !  allocate instruction generate a core dump, instead of returning
1811 !  an error code, so that this part of code has been made optional.
1812 
1813    nquarter_mbytes=16
1814    nmbytes=nquarter_mbytes/4.0_dp
1815 
1816 !  With an increase ratio of 1.25_dp (see below), ii=5 leads to 9 MB,
1817 !  ii=10 leads to 28 MB, ii=15 leads to 85 MB, ii=18 leads to 165 MB,
1818 !  ii=30 is over 2 GB
1819    do ii=1,30
1820      ABI_STAT_ALLOCATE(bigarray,(32*1024,nquarter_mbytes), ier)
1821      if(ier/=0)then
1822        write(message,'(a,i0,a)')' memana : failed to allocate ',nmbytes,' Mbytes'
1823        call wrtout(std_out,message,'PERS')
1824        exit
1825      end if
1826      write(message,'(a,i0,a)')' memana : succeeded to allocate ',nmbytes,' Mbytes'
1827      call wrtout(std_out,message,'PERS')
1828 !    Here really test the space
1829 !    do kk=1,nquarter_mbytes
1830 !    do jj=1,32*1024,37
1831 !    bigarray(jj,kk)=0.0_dp
1832 !    end do
1833 !    write(std_out,*)' memana : wrote ',kk,' quarter of mbytes'
1834 !    end do
1835      ABI_DEALLOCATE(bigarray)
1836      nquarter_mbytes=dble(nquarter_mbytes)*1.25_dp
1837      nmbytes=nquarter_mbytes/4.0_dp
1838    end do
1839    if(allocated(bigarray)) then
1840      ABI_DEALLOCATE(bigarray)
1841    end if
1842 
1843    MSG_ERROR_CLASS("in memana with option==2 .and. quit==1", "MemanaError")
1844  end if !  End the test of the available memory
1845 
1846 !--------------------------------------------------------------------
1847 
1848  ABI_DEALLOCATE(cdpfftf)
1849  ABI_DEALLOCATE(cdpfft)
1850  ABI_DEALLOCATE(cdpmpw)
1851  ABI_DEALLOCATE(cintfftf)
1852  ABI_DEALLOCATE(cintfft)
1853  ABI_DEALLOCATE(cintmpw)
1854  ABI_DEALLOCATE(cdpadd)
1855  ABI_DEALLOCATE(cintadd)
1856  ABI_DEALLOCATE(mbdpadd)
1857  ABI_DEALLOCATE(mbdpfftf)
1858  ABI_DEALLOCATE(mbdpfft)
1859  ABI_DEALLOCATE(mbdpmpw)
1860  ABI_DEALLOCATE(mbintadd)
1861  ABI_DEALLOCATE(mbintfftf)
1862  ABI_DEALLOCATE(mbintfft)
1863  ABI_DEALLOCATE(mbintmpw)
1864  ABI_DEALLOCATE(mbother)
1865  ABI_DEALLOCATE(mbtot)
1866 
1867 end subroutine memana

m_memeval/memorf [ Functions ]

[ Top ] [ m_memeval ] [ Functions ]

NAME

 memorf

FUNCTION

 Estimation of the memory needed for a response-function job.
 According to the value of the option variable,
 might also try to allocate this amount of memory, and if it fails,
 might estimate the available memory.

INPUTS

  cplex=1 or 2, indicate whether the den and pot functions are real or complex
  getcell=if non-zero, the values of acell and rprim are taken from
   the output of another dataset
  idtset=number of the current dataset
  intxc=control xc quadrature
  iout=unit number for output of formatted data.
  iprcel=govern the choice of preconditioner for the SCF cycle
  iscf=governs the choice of SCF algorithm, or non-SCF calculation.
  jdtset=index of the current dataset
  lmnmax=max. number of (l,m,n) components over all type of psps
  lnmax =max. number of (l,n)   components over all type of psps
  mband =maximum number of bands
  mffmem =governs the number of FFT arrays which are fit in core memory
  mgfft =maximum single fft dimension
  mkmems=number of k points which can fit in memory; set to 0 if use disk
    the three values correspond to mkmem, mkqmem and mk1mem
  mpi_enreg=information about MPI parallelization
  mpssang is 1+maximum angular momentum for nonlocal pseudopotential
  mpssoang is 1+maximum (spin*angular momentum) for nonlocal pseudopotential
  mpw   =maximum number of planewaves in basis sphere (large number)
  mqgrid=maximum dimension of grid of q values for psp representations
  natom =number of atoms in unit cell
  nband(nkpt*nsppol)=number of bands at each k point, for each polarization
  nfft  =(effective) number of FFT grid points (for one processor)
  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
  nkpt  =number of k points
  nloalg(3)=governs the choice of the algorithm for non-local operator.
  nspden=number of spin-density components
  nspinor=number of spinorial components of the wavefunctions
  nsppol=number of channels for spin-polarization (1 or 2)
  nsym  =number of symmetry elements in space group
  ntypat=number of types of atoms
  n1xccc=dimension of xccc1d ; 0 if no XC core correction is used
  occopt=option for occupation numbers. If 3<=occopt<=7, varying occupation
  optddk=1 if ddk is computed during run
  optphon=1 if phonons are computed during run
  option : if 0 , no test of available memory
           if 1 , the routine tries to allocate the estimated memory, for testing
                    purposes, and if a failure occurs, the routine stops.
           if 2 , like 1, but before stopping, the routine will provide
                    an estimation of the available memory.
  optstrs=1 if strain perturbation is computing during run
  prtvol=control print volume
  useylm=governs the way the nonlocal operator is to be applied:
         1=using Ylm, 0=using Legendre polynomials
  use_gpu_cuda=1 if Cuda (GPU) is on
  xclevel= level of the XC functional

OUTPUT

  (only writing)

NOTES

 for the estimation, it is only taken into account those
 arrays that have some probability of being larger than 1000*8 bytes :
 - All the arrays that have large numbers as one of their dimensions
 (mqgrid, mpw, nfft, ngfft(4)*ngfft(5)*ngfft(6),n1xccc
                                      or a constant larger than 1000)
 - All the arrays that have a product of two moderately large numbers
 (potential size above 30  : mband, mgfft, mkmems, natom, nkpt, nsym,
  or a constant larger than 30)
 After this estimation, an amount of (176 + 55 + 6*natom) Kbytes is added
 to take into account the static arrays declared
 in rhotoxc and daughter routines (at maximum 22*1000 dp numbers),
 as well as other arrays like
 character(len=500) :: message (present in about 100 routines), or the different
 arrays allocated in move.f, brdmin.f, gstate.f (xf array) or pspini.f
 In the case 3<=occopt<=7 this amount is increased by 760 Kbytes
 to take into account the arrays smdfun, occfun, entfun, workfun and xgrid,
 declared in getnel

 The current version takes into account only :
 1) and 2) the "main chain" in its two slightly different versions :
 driver - respfn - dfpt_looppert - dfpt_scfcv - dfpt_vtorho - dfpt_vtowfk -
     dfpt_cgwf - getghc - fourwf or (nonlop+opernl)

 Also, it is assumed that the potentials are non-local, even if there
     are local ! It would be necessary to update this routine
     now that the beginning of psp files is read before
     the present call (XG 980502)

 Some BIG approximations, not present in the GS corresponding routine
  have been done : nsym=nsym1, nkpt=nkpt_rbz, mpw=mpw1 ...

PARENTS

      memory_eval

CHILDREN

      memana,wrtout

SOURCE

1972 subroutine memorf(cplex,n1xccc,getcell,idtset,intxc,iout,iprcel,&
1973 & iscf,jdtset,lmnmax,lnmax,mband,mffmem,mgfft,&
1974 & mkmems,mpi_enreg,mpsang,mpssoang,mpw,mqgrid,&
1975 & natom,nband,nfft,ngfft,&
1976 & nkpt,nloalg,nspden,nspinor,nsppol,nsym,ntypat,&
1977 & occopt,optddk,optphon,option,optstrs,prtvol,useylm,use_gpu_cuda,xclevel)
1978 
1979 
1980 !This section has been created automatically by the script Abilint (TD).
1981 !Do not modify the following lines by hand.
1982 #undef ABI_FUNC
1983 #define ABI_FUNC 'memorf'
1984 !End of the abilint section
1985 
1986  implicit none
1987 
1988 !Arguments ------------------------------------
1989 !scalars
1990  integer,intent(in) :: cplex,getcell,idtset,intxc,iout,iprcel,iscf
1991  integer,intent(in) :: jdtset,lmnmax,lnmax,mband,mffmem,mgfft,mpsang
1992  integer,intent(in) :: mpssoang,mpw,mqgrid,n1xccc,natom,nfft,nkpt
1993  integer,intent(in) :: nspden,nspinor,nsppol,nsym,ntypat,occopt
1994  integer,intent(in) :: optddk,option,optphon,optstrs,prtvol,useylm
1995  integer,intent(in) :: use_gpu_cuda,xclevel
1996  type(MPI_type),intent(in) :: mpi_enreg
1997 !arrays
1998  integer,intent(in) :: mkmems(3),nband(nkpt*nsppol),ngfft(18)
1999  integer,intent(in) :: nloalg(3)
2000 
2001 !Local variables-------------------------------
2002 !marrays= maximal number of arrays to be monitored (or group of arrays)
2003 !cmpw(marrays)=count of blocks of size mpw bytes
2004 !cfft(marrays)=number of blocks of size nfft bytes
2005 !cadd(marrays)=additional storage needed (in bytes)
2006 !dttyp(marrays)=datatype of the array : 4 for integers, 8 for real(dp)
2007 !nchain= number of different chains of routines
2008 !chain(marrays,nchain)=different chains of routines
2009 !scalars
2010  integer,parameter :: marrays=150,nchain=2
2011  integer :: fftalgb,matblk,maxmkmem,mincat,mk1mem,mkmem,mkqmem,mu,n_fftgr
2012  integer :: narr_fourdp,ngrad,nprocwf
2013  integer :: my_natom
2014  real(dp) :: mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm
2015  character(len=500) :: message
2016  character(len=1) :: firstchar
2017 !arrays
2018  integer :: dttyp(marrays)
2019  real(dp) :: cadd(marrays),cfft(marrays),cmpw(marrays)
2020  real(dp),allocatable :: cfft_dum(:)
2021  logical :: chain(marrays,nchain)
2022 
2023 ! **************************************************************************
2024 
2025  if(option<0 .or. option>2)then
2026    write(message, '(a,i0,a)')'option= ',option,' while the only allowed values are 0, 1, or 2.'
2027    MSG_BUG(message)
2028  end if
2029 
2030  firstchar=' ';if (use_gpu_cuda==1) firstchar='_'
2031  cmpw(:)=zero ; cfft(:)=zero ; cadd(:)=zero
2032  dttyp(:)=0
2033 
2034  call wrtout(std_out,' memorf : analysis of memory needs ','COLL')
2035 
2036  if(jdtset>=100)then
2037    write(message,'(80a,a,a,i5,a)')('=',mu=1,80),ch10,&
2038 &   ' Values of the parameters that define the memory need for DATASET',jdtset,&
2039 &   ' (RF).'
2040  else if(jdtset/=0)then
2041    write(message,'(80a,a,a,i3,a)')('=',mu=1,80),ch10,&
2042 &   ' Values of the parameters that define the memory need for DATASET',jdtset,&
2043 &   ' (RF).'
2044  else
2045    write(message,'(80a,a,a,a)')('=',mu=1,80),ch10,&
2046 &   ' Values of the parameters that define the memory need of the present run',&
2047 &   ' (RF).'
2048  end if
2049  call wrtout(iout,message,'COLL')
2050  call wrtout(std_out,message,'COLL')
2051 
2052  mkmem=mkmems(1)
2053  mkqmem=mkmems(2)
2054  mk1mem=mkmems(3)
2055  my_natom=natom;if (mpi_enreg%nproc_atom>1) my_natom=mpi_enreg%my_natom
2056 
2057  write(message,'( 4(a,i8),a,4(a,i8) )' ) &
2058 & '     intxc =',intxc   ,'      iscf =',iscf,&
2059 & '    lmnmax =',lmnmax  ,'     lnmax =',lnmax,ch10,&
2060 & '     mgfft =',mgfft,'  mpssoang =',mpssoang,&
2061 & '    mqgrid =',mqgrid,'     natom =',natom
2062  call wrtout(iout,message,'COLL')
2063  call wrtout(std_out,message,'COLL')
2064 
2065  write(message,'( 4(a,i8),a,4(a,i8),a,4(a,i8) )' ) &
2066 & '  nloc_mem =',nloalg(2)*(nloalg(3)+1),'    nspden =',nspden ,&
2067 & '   nspinor =',nspinor,'    nsppol =',nsppol ,ch10,&
2068 & '      nsym =',nsym,'    n1xccc =',n1xccc ,&
2069 & '    ntypat =',ntypat,'    occopt =',occopt ,ch10,&
2070 & '   xclevel =',xclevel
2071  call wrtout(iout,message,'COLL')
2072  call wrtout(std_out,message,'COLL')
2073 
2074  write(message,'(4(3(a,i12),a))') &
2075 & '-    mband =',mband  ,'        mffmem =',mffmem,&
2076 & '         mkmem =',mkmem  ,ch10,&
2077 & '-   mkqmem =',mkqmem ,'        mk1mem =',mk1mem,&
2078 & '           mpw =',mpw  ,ch10,&
2079 & '      nfft =',nfft ,'          nkpt =',nkpt
2080  call wrtout(iout,message,'COLL')
2081  call wrtout(std_out,message,'COLL')
2082 
2083  if (my_natom/=natom)then
2084    write(message,'(a,i10)') 'Pmy_natom=',my_natom
2085    call wrtout(iout,message,'COLL')
2086    call wrtout(std_out,message,'COLL')
2087  end if
2088 
2089  write(message,'(80a)') ('=',mu=1,80)
2090  call wrtout(iout,message,'COLL')
2091  call wrtout(std_out,message,'COLL')
2092 
2093  if(getcell>0 .or. (getcell<0 .and. idtset+getcell>0) )then
2094    write(message,'(a,a,a,a,a,a,i3,a,i3,a,a,a,a,a,a)' )ch10,&
2095 &   ' memorf : COMMENT -',ch10,&
2096 &   '  The determination of memory needs at this stage is meaningless,',ch10,&
2097 &   '  since getcell = ',getcell,' is non-zero, while idtset=',idtset,'.',ch10,&
2098 &   '  The following numbers are obtained by supposing that acell and rprim',ch10,&
2099 &   '  are NOT taken from a previous dataset. You cannot rely on them.',ch10
2100    call wrtout(iout,message,'COLL')
2101    call wrtout(std_out,message,'COLL')
2102  end if
2103 
2104  n_fftgr=1
2105  if(iscf==1)            n_fftgr=5
2106  if(iscf==2.or.iscf==3) n_fftgr=4
2107  if(iscf==5.or.iscf==6) n_fftgr=10
2108 
2109 !work1 and work2 in fourdp : take into account approximately fftalgb
2110  fftalgb=mod(ngfft(7),100)/10
2111  if(fftalgb==0)narr_fourdp=2*2
2112  if(fftalgb==1)narr_fourdp=2
2113 
2114  ngrad=1
2115  if(xclevel==2)ngrad=2
2116 
2117 !(0)                     in main, driver, and respfn -------------------
2118 !indsym (respfn)
2119  cadd(1)=4*nsym*natom          ; dttyp(1)=4
2120 !rhor,rhog (respfn)
2121  cfft(2)=nspden+2              ; dttyp(2)=8
2122 !occ (driver), doccde (respfn)
2123  cadd(3)=2*mband*nkpt*nsppol   ; dttyp(3)=8
2124 !qgrid,vlspl,ffspl (driver)
2125  cadd(4)=mqgrid*(1+2*ntypat*(1+lnmax))   &
2126 & ; dttyp(4)=8
2127 !xccc1d (driver)
2128  cadd(5)=n1xccc*6*ntypat       ; dttyp(5)=8
2129 !vtrial (respfn)
2130  cfft(6)=nspden                ; dttyp(6)=8
2131 !kxc (respfn)
2132  cfft(7)=2*nspden-1            ; dttyp(7)=8
2133 
2134 !(1-2)                   in dfpt_looppert --------------------------------------
2135 !ph1d
2136  cadd(11)=2*3*(2*mgfft+1)*natom ; dttyp(11)=8
2137 !vpsp1
2138  cfft(12)=cplex                ; dttyp(12)=8
2139 !indsy1  assume that nsym=nsym1
2140  cadd(13)=4*nsym*natom         ; dttyp(13)=4
2141 !irrzonr1 and phnons1  assume that nsym=nsym1
2142  if(nsym/=1)then
2143    cfft(14)=(2+(nspden/4))*((nspden/nsppol)-3*nspden/3)     ; dttyp(14)=4
2144    cfft(15)=2*((nspden/nsppol)-3*nspden/3)                  ; dttyp(15)=8
2145  end if
2146 !doccde_rbz, eigen0, eigenq, occ_rbz, docckqde, occkq, resid
2147 !assume than nkpt=nkpt_rbz
2148  cadd(16)=7*mband*nkpt*nsppol  ; dttyp(16)=8
2149 !kg
2150  cmpw(18)=3*mkmem              ; dttyp(18)=4
2151 !cg
2152  cmpw(19)=2*nspinor*mband*mkmem*nsppol  ; dttyp(19)=8
2153 !kg1
2154  cmpw(21)=3*mk1mem             ; dttyp(21)=4
2155 !cgq
2156  cmpw(22)=2*nspinor*mband*mkqmem*nsppol  ; dttyp(22)=8
2157 !cg1
2158  cmpw(23)=2*nspinor*mband*mk1mem*nsppol  ; dttyp(23)=8
2159 !rhor1,rhog1
2160  cfft(24)=cplex*nspden+2       ; dttyp(24)=8
2161 !eigen1
2162 !assume than nkpt=nkpt_rbz
2163  cadd(25)=2*mband*mband*nkpt*nsppol      ; dttyp(25)=8
2164 !ylm
2165  cmpw(26)=mkmem*mpsang*mpsang*useylm     ; dttyp(26)=8
2166 
2167 !(3)                     in dfpt_scfcv --------------------------------------
2168 
2169 !vhartr1,vtrial1,vxc
2170  cfft(31)=cplex+cplex*nspden+nspden      ; dttyp(31)=8
2171  if(iscf>0)then
2172 !  f_fftgr
2173    cfft(32)=cplex*nspden*n_fftgr*mffmem    ; dttyp(32)=8
2174  end if
2175 
2176 !(4)                   in dfpt_vtorho----------------------------------------
2177 
2178 !proc_distrb
2179  cadd(41)=nkpt*mband*nsppol    ; dttyp(41)=4
2180 !kg_k,kg1_k
2181  cmpw(42)=6                    ; dttyp(42)=4
2182 !rhoaug1, vlocal, vlocal1
2183  cfft(43)=2*cplex+1            ; dttyp(43)=8
2184  cadd(43)=(2*cplex+1)*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
2185 
2186  if(mkqmem==0)then
2187 !  cgq_disk
2188    cmpw(45)=2*nspinor*mband      ; dttyp(45)=8
2189  end if
2190 !doccde_k,doccde_kq,eig0_k, ..., eig1_k, rocceig
2191  cadd(47)=(14+3*mband)*mband   ; dttyp(47)=8
2192 !ylm_k,ylm1_k
2193  cmpw(49)=2*mpsang*mpsang*useylm  ; dttyp(49)=8
2194 
2195 !(5)                     in dfpt_vtowfk --------------------------------------
2196 
2197 !dkinpw,kinpw1
2198  cmpw(51)=2                    ; dttyp(51)=8
2199 !ffnlk,ffnl1,ffnlkq
2200  cmpw(52)=2*(ntypat+2)*lmnmax  ; dttyp(52)=8
2201 !ghc,gvnlc,gvnl1
2202  cmpw(53)=6*nspinor            ; dttyp(53)=8
2203 !ph3d
2204  matblk=NLO_MINCAT
2205  if(nloalg(2)<=0)matblk=natom
2206  cmpw(54)=2*matblk             ; dttyp(54)=8
2207 !wfraug,wfraug1,rhoaug
2208  cfft(55)=5                    ; dttyp(55)=8
2209  cadd(55)=5*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
2210 !cwavef,cwave0,cwave1
2211  cmpw(56)=6*nspinor            ; dttyp(56)=8
2212 
2213 !(6)                     in dfpt_cgwf ----------------------------------------
2214 
2215 !gh1, gh_direc, gvnl_direc, conjgr, direc, vresid, cwaveq
2216  cmpw(61)=14*nspinor            ; dttyp(61)=8
2217 
2218 !(9a)                    in getghc and fourwf----------------------------
2219 
2220 !work (in getghc)
2221  cfft(91)=2                    ; dttyp(91)=8
2222  cadd(92)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
2223 !work1 (in fourwf)
2224  cfft(92)=2                    ; dttyp(92)=8
2225  cadd(92)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
2226 
2227 !(9b)                    in getghc, nonlop and opernl--------------------
2228  mincat=min(NLO_MINCAT,natom-ntypat+1)
2229  if (useylm==0) then                          ! ===== nonlop_pl
2230 !  gxa  (in nonlop)
2231    cadd(94)=2*20*mincat*2       ; dttyp(94)=8
2232 !  dgxdt  (in nonlop)
2233    cadd(95)=2*3*20*mincat*2    ; dttyp(95)=8
2234 !  dgxds  (in nonlop)
2235    cadd(96)=2*56*mincat*2      ; dttyp(96)=8
2236 !  teffv (in opernl4 - no distinction is made for opernl, opernl2 or opernl3)
2237 !  kpgx, ffkg
2238 !  here, evaluate an upper value, with nproj=2, p,d and f orbitals, but not
2239 !  considering the stress, since it will be called outside of the main chain
2240    cadd(97)=NLO_MBLKPW*40        ; dttyp(97)=8
2241 !  kpg if nloalg(3)=1
2242    cadd(98)=3*mpw*nloalg(3)     ; dttyp(98)=8
2243  else                                        ! ===== nonlop_ylm
2244 !  gx + gxfac
2245    cadd(94)=2* 2*mpw*lmnmax*mincat    ; dttyp(94)=8
2246 !  dgxdt + dgxdtfac + d2gxdt
2247    if (optddk>0.and.optphon==0.and.optstrs==0) cadd(95)=2*2*mpw*lmnmax*mincat
2248    if (optphon>0) cadd(95)=12*2*mpw*lmnmax*mincat
2249    if (optstrs>0) cadd(95)=72*2*mpw*lmnmax*mincat
2250    dttyp(95)=8
2251 !  kpg
2252    cadd(96)=2*3*mpw       ; dttyp(96)=8
2253    if (optphon>0) cadd(96)=cadd(96)+2*6*mpw
2254 !  miscelaneous: indlmn_typ, ffnl_typ
2255    cadd(97)=lmnmax*(6+mpw*(2+optstrs)); dttyp(97)=8
2256 !  opernla_ylm: scalar,scali,scalarr,scalari
2257    cadd(98)=2*mpw+2*mpw
2258    if (optddk>0.and.optstrs==0) cadd(98)=cadd(98)+2*mpw
2259    if (optstrs>0) cadd(98)=cadd(98)+9*2*mpw
2260    dttyp(98)=8
2261  end if
2262 
2263 !--------------------------------------------------------------------------
2264 
2265  chain(:,:)=.true.
2266 
2267 !Define the main chain version a (fourwf)
2268  chain(93:100,1)=.false.
2269 
2270 !Define the main chain version b (nonlop+opernl)
2271  chain(91:92,2)=.false.
2272 
2273 !The memory needed for each chain has been computed
2274 !-------------------------------------------------------------------------
2275 !Still need some auxiliary data : estimate the disk space
2276 !or the maximum segment size.
2277 
2278 !XG030513 : MPIWF need to multiply mbdiskwf by the number of processors
2279 !in the WF group. For the time being, nprocwf=1
2280  nprocwf=mpi_enreg%nproc_fft
2281 
2282  mbdiskwf=(8*2*mpw*nprocwf*sum(nband(1:nkpt*nsppol)))/1024._dp**2 + 0.002_dp
2283  mbdiskpd=(8*nfft*nsppol)/1024._dp**2 + 0.002_dp
2284 
2285 !Determine the largest array out of cg,cg1,cgq, cg_disk or f_fftgr (f_fftgr_disk)
2286  if(mkmem==0 .and. mk1mem==0 .and. mkqmem==0)then
2287    mbcg=(8*2*mpw*nspinor*mband)/1024._dp**2 + 0.002_dp
2288  else
2289    maxmkmem=maxval(mkmems(:))
2290    mbcg=(8*2*mpw*nspinor*mband*maxmkmem*nsppol)/1024._dp**2 + 0.002_dp
2291  end if
2292  if(mffmem==0)then
2293    mbf_fftgr=(8*cplex*nfft*n_fftgr)/1024._dp**2 + 0.002_dp
2294  else
2295    mbf_fftgr=(8*cplex*nfft*n_fftgr*nspden*mffmem)/1024._dp**2 + 0.002_dp
2296  end if
2297 
2298 !---------------------------------------------------------------------
2299 !Now, analyze the data
2300 
2301 !DEBUG
2302 !write(std_out,*)' memorf : nchain=',nchain
2303 !ENDDEBUG
2304 
2305  ABI_ALLOCATE(cfft_dum,(marrays))
2306  cfft_dum=zero
2307  mbgylm=zero
2308  call memana(cadd,cfft,cfft_dum,chain,cmpw,dttyp,iout,iprcel,iscf,&
2309 & marrays,mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm,mffmem,&
2310 & mpw,natom,nchain,nfft,nfft,occopt,option,prtvol)
2311  ABI_DEALLOCATE(cfft_dum)
2312 
2313 end subroutine memorf

m_memeval/memory [ Functions ]

[ Top ] [ m_memeval ] [ Functions ]

NAME

 memory

FUNCTION

 Estimation of the memory needed for a ground-state job.
 According to the value of the option variable,
 might also try to allocate this amount of memory, and if it fails,
 might estimate the available memory.

INPUTS

  extrapwf=flag controlling the extrapolation of wave functions during MD or relaxation
  getcell=if non-zero, the values of acell and rprim are taken from
   the output of another dataset
  idtset=number of the current dataset
  icoulomb=0 for periodic Fourier calculation of Hartree potential; 1 for isolated system using Poisson solver.
  intxc=control xc quadrature
  ionmov=control force calculations
  iout=unit number for output of formatted data.
  densfor_pred=govern the choice of density prediction and/or forces correction
  iprcel=govern the choice of preconditioner for the SCF cycle
  iscf=governs the choice of SCF algorithm, or non-SCF calculation.
  jdtset=index of the current dataset
  lmnmax=max. number of (l,m,n) components over all type of psps
  lnmax =max. number of (l,n)   components over all type of psps
  mband =maximum number of bands
  mffmem =governs the number of FFT arrays which are fit in core memory
  mgfftf =maximum single fft dimension (fine grid, if PAW)
  mgfft  =maximum single fft dimension (coarse grid, if PAW)
  mgfftdiel =maximum single fft dimension for susceptibility and dielectric
   matrices.
  mkmem =maximum number of k points which can fit in core memory
  mpi_enreg=information about MPI parallelization
  mpssang is 1+maximum angular momentum for nonlocal pseudopotential
  mpssoang is 1+maximum (spin*angular momentum) for nonlocal pseudopotential
  mpw   =maximum number of planewaves in basis sphere (large number)
  mqgrid_ff=dimension of q (or G) grid for nl form factors (array ffspl)
  mqgrid_vl=dimension of q (or G) grid for Vloc (array vlspl)
  natom =number of atoms in unit cell
  nband(nkpt*nsppol)=number of bands at each k point, for each polarization
  nfftf =number of fft grid points for density        (fine grid, if PAW)
  nfft  =number of fft grid points for wavefunctions  (coarse grid, if PAW)
  nfftdiel  =maximum number of fft grid points for susceptibility
    and dielectric matrices
  ngfftf(18)=contain all needed information about 3D FFT (fine grid, if PAW)
  ngfft(18) =contain all needed information about 3D FFT (coarse grid, if PAW)
  ngfftdiel(18)=contain all needed information about 3D FFT, dielectric case,
                 see ~abinit/doc/variables/vargs.htm#ngfft
    for susceptibility and dielectric matrices
  nimage=number of images (replicas) of the cell
  nkpt  =number of k points
  npsp=number of different pseudopotentials
  npwdiel=number of plane wave for susceptibility and dielectric matrix
  npulayit=number of iterations used in Pulay SCF mixing
  nloalg(3)=governs the choice of the algorithm for non-local operator.
  nspden=number of spin-density components
  nspinor=number of spinorial components of the wavefunctions
  nsppol=number of channels for spin-polarization (1 or 2)
  nsym  =number of symmetry elements in space group
  ntypat =number of types of atoms
  n1xccc=dimension of xccc1d ; 0 if no XC core correction is used
  occopt=option for occupation numbers. If 3<=occopt<=8, varying occupation
  optforces=1 if forces are computed during run
  option : if 0 , no test of available memory
           if 1 , the routine tries to allocate the estimated memory, for testing
                    purposes, and if a failure occurs, the routine stops.
           if 2 , like 1, but before stopping, the routine will provide
                    an estimation of the available memory.
  optstress=1 if stresses are computed during run
  pawcpxocc=2 if PAW occupancies (rhoij) are complex
  pawmixdg=1 if mixing (in PAW) is done on the fine grid
  pawnhatxc=1 if nhat PAW density has to be analytically included in XC
  pawspnorb=1 when spin-orbit is activated within PAW
  pawstgylm=1 if g_l(r).Y_lm(r) factors are stored in memory (PAW)
  prtvol=control print volume
  pspheads(npsp)=<type pspheader_type>all the important information from the header
  tfkinfun=flag controling the use of Thomas-Fermi algorithme (without WF)
  typat(natom)=type of each atom
  ucvol= unit cell volume
  usepaw= 0 for non paw calculation; =1 for paw calculation
  useylm=governs the way the nonlocal operator is to be applied:
         1=using Ylm, 0=using Legendre polynomials
  use_gpu_cuda=1 if Cuda (GPU) is on
  xclevel=XC functional level

OUTPUT

  (only writing)

NOTES

 for the estimation, it is only taken into account those
 arrays that have some probability of being larger than 1000*8 bytes :
 - All the arrays that have large numbers as one of their dimensions
 (mqgrid, mpw, nfft, ngfft(4)*ngfft(5)*ngfft(6),
                     ngfftdiel(4)*ngfftdiel(5)*ngfftdiel(6), n1xccc
                                      or a constant larger than 1000)
 - All the arrays that have a product of two moderately large numbers
 (potential size above 30  : mband, mgfft, mkmem, natom, nkpt, nsym,
  or a constant larger than 30)
 After this estimation, an amount of (176 + 55 + 6*natom) Kbytes is added
 to take into account the static arrays declared
 in rhotoxc and daughter routines (at maximum 22*1000 dp numbers),
 as well as other arrays like
 character(len=500) :: message (present in about 100 routines), or the different
 arrays allocated in move.f, brdmin.f, gstate.f (xf array) or pspini.f
 In the case 3<=occopt<=8 this amount is increased by 760 Kbytes
 to take into account the arrays smdfun, occfun, entfun, workfun and xgrid,
 declared in getnel

 The current version takes into account
 1) and 2) the "main chain" in its two slightly different versions :
 driver - gstate - (move or brdmin) - scfcv - vtorho - vtowfk -
 3) the xc chain :
 driver - gstate - (move or brdmin) - scfcv - (vresfo) - rhotoxc - xcden
 4) the mkrho chain :
 driver - gstate - (move or brdmin) - scfcv - vtorho - mkrho
 5) the fourdp chain :
 driver - gstate - (move or brdmin) - scfcv - vtorho
         ( + ftofr - fourdp - symrhg )
 6) the parallel k-point chain :
 driver - gstate - (move or brdmin) - scfcv - vtorho - MPI_ALLREDUCE
 7) the newvtr chain :
 driver - gstate - (move or brdmin) - scfcv - newvtr
 8) the susceptibility chain :
 driver - gstate - (move or brdmin) - scfcv - vtorho - suscep - suskmm
 9) the dielectric chain :
 driver - gstate - (move or brdmin) - scfcv - vtorho - dielmt
 10) the tddft chain :
 driver - gstate - (move or brdmin) - scfcv - vtorho - tddft

 It is valid for all values of iscf, but not for nstep=0 (when the chain
     goes through energy instead of vtorho).

 Also, it is assumed that the potentials are non-local, even if there
     are local ! It would be necessary to update this routine
     now that the beginning of psp files is read before
     the present call (XG 980502)

 One might also estimate if there must be a chain arriving at :
  strnps , mkffnl, mkcore, mklocl, mkrho, prcpot, irrzg, initro,
  clnup1.
 This is because there are allocated arrays in these routines.

PARENTS

      memory_eval

CHILDREN

      memana,wrtout

SOURCE

 500 subroutine memory(n1xccc,extrapwf,getcell,idtset,icoulomb,intxc,ionmov,iout,densfor_pred,iprcel,&
 501 & iscf,jdtset,lmnmax,lnmax,&
 502 & mband,mffmem,mgfft,mgfftdiel,mgfftf,mkmem,mpi_enreg,mpsang,mpssoang,mpw,mqgrid_ff,mqgrid_vl,&
 503 & natom,nband,nfft,nfftdiel,nfftf,ngfft,ngfftdiel,ngfftf,nimage,&
 504 & nkpt,nloalg,npsp,npulayit,npwdiel,nspden,nspinor,nsppol,nsym,ntypat,&
 505 & occopt,optforces,option,optstress,pawcpxocc,pawmixdg,pawnhatxc,pawspnorb,pawstgylm,&
 506 & prtvol,pspheads,tfkinfunc,typat,ucvol,usepaw,useylm,use_gpu_cuda,xclevel)
 507 
 508 
 509 !This section has been created automatically by the script Abilint (TD).
 510 !Do not modify the following lines by hand.
 511 #undef ABI_FUNC
 512 #define ABI_FUNC 'memory'
 513 !End of the abilint section
 514 
 515  implicit none
 516 
 517 !Arguments ------------------------------------
 518 !scalars
 519  integer,intent(in) :: extrapwf,getcell,icoulomb,idtset,intxc,ionmov,iout,densfor_pred
 520  integer,intent(in) :: iprcel,iscf,jdtset,lmnmax,lnmax,mband,mffmem,mgfft
 521  integer,intent(in) :: mgfftdiel,mgfftf,mkmem,mpsang,mpssoang,mpw,mqgrid_ff
 522  integer,intent(in) :: mqgrid_vl,n1xccc,natom,nfft,nfftdiel,nfftf,nimage,nkpt,npsp
 523  integer,intent(in) :: npulayit,npwdiel,nspden,nspinor,nsppol,nsym,ntypat
 524  integer,intent(in) :: occopt,optforces,option,optstress
 525  integer,intent(in) :: pawcpxocc,pawmixdg,pawnhatxc,pawspnorb,pawstgylm
 526  integer,intent(in) :: prtvol,tfkinfunc,usepaw,useylm,use_gpu_cuda,xclevel
 527  real(dp) :: ucvol
 528  type(MPI_type),intent(in) :: mpi_enreg
 529 !arrays
 530  integer,intent(in) :: nband(nkpt*nsppol),ngfft(18),ngfftdiel(18),ngfftf(18)
 531  integer,intent(in) :: nloalg(3),typat(natom)
 532  type(pspheader_type) :: pspheads(npsp)
 533 
 534 !Local variables-------------------------------
 535 !marrays=maximal number of arrays to be monitored (or group of arrays)
 536 !cmpw(marrays)=count of blocks of size mpw bytes
 537 !cfft(marrays) =count of blocks of size nfft bytes (coarse grid, if PAW)
 538 !cfftf(marrays)=count of blocks of size nfft bytes (fine grid, if PAW)
 539 !cadd(marrays)=count of additional storage needed (in bytes)
 540 !dttyp(marrays)=datatype of the array : 4 for integers, 8 for real(dp)
 541 !nchain=number of different chains of routines
 542 !chain(marrays,nchain)=different chains of routines
 543  ! The cfoo arrays are used to store the allocated memory in the different
 544  ! routines of the program. Each stack of the program can allocate some
 545  ! memory and the amount is estimated and stored in cfoo(i). The lower i,
 546  ! the higher routine. cfft is memory used by FFT handling, cmpw for
 547  ! plane waves storage and cadd for miscellaneous memory occupation.
 548  ! The unit is the multiplier of the size of nfft for cfft, the multiplier
 549  ! of mpw for cmpw and the actually allocated memory for cadd.
 550  ! This array stores the size of each chunk of memory (8 for double
 551  ! floating point precision, 4 for integers and so on).
 552  ! This array defines if the chain defined above allocate or not the
 553  ! memory (depending on options).
 554 !scalars
 555  integer,parameter :: marrays=150,nchain=10
 556  integer :: fftalgb,histsz,ii,iscf10,jj,l_max,l_size_max,matblk,mblk,mincat,mu
 557  integer :: my_natom,n_fftgr,narr_fourdp,nbnd_in_blk,ndiel4,ndiel456,ndiel5,ndiel6
 558  integer :: ngrad,nprocwf,nspgrad,rhoij_nspden
 559  real(dp) :: mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm
 560  character(len=500) :: message
 561 ! character(len=1) :: firstchar
 562 !arrays
 563  integer :: dttyp(marrays),nattyp(ntypat)
 564  integer,allocatable :: basis_size(:),l_size(:),lmn2_size(:),lmn_size(:)
 565  integer,allocatable :: mesh_size(:),my_nattyp(:),pawver(:),shape_type(:)
 566  real(dp) :: cadd(marrays),cfft(marrays),cfftf(marrays),cmpw(marrays)
 567  real(dp),allocatable :: rshp(:)
 568  logical :: chain(marrays,nchain)
 569 
 570 ! **************************************************************************
 571 
 572  if(option<0 .or. option>2)then
 573    write(message,'(A,I0,A)')'option=',option,' while the only allowed values are 0, 1, or 2.'
 574    MSG_BUG(message)
 575  end if
 576 
 577 !firstchar=' ';if (use_gpu_cuda==1) firstchar='_'
 578  cmpw(:)=zero ; cfft(:)=zero ; cfftf(:)=zero ; cadd(:)=zero
 579  dttyp(:)=0
 580 
 581  my_natom=natom;if (mpi_enreg%nproc_atom>1) my_natom=mpi_enreg%my_natom
 582 
 583  call wrtout(std_out,'memory : analysis of memory needs ','COLL')
 584 
 585  if(jdtset>=100)then
 586    write(message,'(80a,a,a,i5,a)')('=',mu=1,80),ch10,&
 587 &   ' Values of the parameters that define the memory need for DATASET',jdtset,'.'
 588  else if(jdtset/=0)then
 589    write(message,'(80a,a,a,i3,a)')('=',mu=1,80),ch10,&
 590 &   ' Values of the parameters that define the memory need for DATASET',jdtset,'.'
 591  else
 592    write(message,'(80a,a,a)')('=',mu=1,80),ch10,&
 593 &   ' Values of the parameters that define the memory need of the present run '
 594  end if
 595  call wrtout(iout,message,'COLL')
 596  call wrtout(std_out,message,'COLL')
 597 
 598  write(message,'( 4(a,i8),a,4(a,i8) )' ) &
 599 & '     intxc =',intxc   ,'    ionmov =',ionmov,&
 600 & '      iscf =',iscf    ,'    lmnmax =',lmnmax,ch10,&
 601 & '     lnmax =',lnmax   ,'     mgfft =',mgfft,&
 602 & '  mpssoang =',mpssoang,'    mqgrid =',mqgrid_vl
 603  call wrtout(iout,message,'COLL')
 604  call wrtout(std_out,message,'COLL')
 605 
 606  write(message,'( 4(a,i8),a,4(a,i8),a,4(a,i8) )' ) &
 607 & '     natom =',natom  ,'  nloc_mem =',nloalg(2)*(nloalg(3)+1),&
 608 & '    nspden =',nspden ,'   nspinor =',nspinor,ch10,&
 609 & '    nsppol =',nsppol ,'      nsym =',nsym,&
 610 & '    n1xccc =',n1xccc ,'    ntypat =',ntypat,ch10,&
 611 & '    occopt =',occopt ,'   xclevel =',xclevel
 612  call wrtout(iout,message,'COLL')
 613  call wrtout(std_out,message,'COLL')
 614 
 615  write(message,'(4(3(a,i12),a))') &
 616 & '-    mband =',mband  ,'        mffmem =',mffmem,&
 617 & '         mkmem =',mkmem  ,ch10,&
 618 & '       mpw =',mpw    ,'          nfft =',nfft ,&
 619 & '          nkpt =',nkpt
 620  call wrtout(iout,message,'COLL')
 621  call wrtout(std_out,message,'COLL')
 622 
 623  if (my_natom/=natom)then
 624    write(message,'(a,i10)') 'Pmy_natom=',my_natom
 625    call wrtout(iout,message,'COLL')
 626    call wrtout(std_out,message,'COLL')
 627  end if
 628 
 629 !Additional information if imgmov is activated (use of replicas of the cell)
 630  if (nimage>1) then
 631    write(message,'(1(a,i10))' ) '  nimage =',nimage
 632    call wrtout(iout,message,'COLL')
 633    call wrtout(std_out,message,'COLL')
 634  end if
 635 
 636 !Additional information on FFT grids if PAW
 637  if (usepaw==1) then
 638    write(message, '(a,a,a,i10,a,i10)' )&
 639 &   ' PAW method is used; the additional fine FFT grid is defined by:',ch10,&
 640 &   '   mgfftf=',mgfftf,'    nfftf =',nfftf
 641    call wrtout(iout,message,'COLL')
 642    call wrtout(std_out,message,'COLL')
 643  end if
 644 
 645 !Additional information if GPU
 646  if (use_gpu_cuda==1) then
 647 !  write(message, '(a)' )' GPU method is used'
 648 !  call wrtout(iout,message,'COLL')
 649 !  call wrtout(std_out,message,'COLL')
 650  end if
 651 
 652 !Additional information needed for the susceptibility and dielectric matrices
 653  if((modulo(iprcel,100)>=20.and.modulo(iprcel,100)<70) .or. iscf==-1)then
 654 
 655 !  Compute the number of bands in blocks (nbnd_in_blk) from mband (see suskmm.f)
 656 !  Consider that if the number of bands is large, there are at most 8 blocks
 657    if(mband>=48)then
 658      mblk=8
 659      nbnd_in_blk=(mband-1)/mblk+1
 660 !    If the number of bands is medium, place 6 bands per block
 661    else if(mband>=12)then
 662      nbnd_in_blk=6
 663 !    Otherwise, must have at least 2 blocks
 664    else
 665      mblk=2
 666      nbnd_in_blk=(mband-1)/mblk+1
 667    end if
 668 
 669    write(message, '(a,a,a,i10,a,i6,a,i10,a,i10)' )&
 670 &   ' For the susceptibility and dielectric matrices, or tddft :',ch10,&
 671 &   '   mgfft =',mgfftdiel,'  nbnd_in_blk=',nbnd_in_blk,'    nfft =',nfftdiel,&
 672 &   '     npw =',npwdiel
 673    call wrtout(iout,message,'COLL')
 674    call wrtout(std_out,message,'COLL')
 675    ndiel4=ngfftdiel(4) ; ndiel5=ngfftdiel(5) ; ndiel6=ngfftdiel(6)
 676    ndiel456=ndiel4*ndiel5*ndiel6
 677  else
 678 !  To be sure of initialisation.
 679    ndiel456 = 1
 680  end if
 681 
 682  write(message,'(80a)') ('=',mu=1,80)
 683  call wrtout(iout,message,'COLL')
 684  call wrtout(std_out,message,'COLL')
 685 
 686  if(getcell>0 .or. (getcell<0 .and. idtset+getcell>0) )then
 687    write(message,'(a,a,a,a,a,a,i3,a,i3,a,a,a,a,a,a)' )ch10,&
 688 &   ' memory : COMMENT -',ch10,&
 689 &   '  The determination of memory needs at this stage is meaningless,',ch10,&
 690 &   '  since getcell = ',getcell,' is non-zero, while idtset=',idtset,'.',ch10,&
 691 &   '  The following numbers are obtained by supposing that acell and rprim',ch10,&
 692 &   '  are NOT taken from a previous dataset. You cannot rely on them.',ch10
 693    call wrtout(iout,message,'COLL')
 694    call wrtout(std_out,message,'COLL')
 695  end if
 696 
 697 !Compute number of atoms per type for current proc
 698  nattyp(:)=0
 699  do ii=1,natom
 700    nattyp(typat(ii))=nattyp(typat(ii))+1
 701  end do
 702 
 703 !PAW: store useful dims
 704  if (usepaw==1) then
 705    ABI_ALLOCATE(basis_size,(npsp))
 706    ABI_ALLOCATE(l_size,(npsp))
 707    ABI_ALLOCATE(lmn_size,(npsp))
 708    ABI_ALLOCATE(lmn2_size,(npsp))
 709    ABI_ALLOCATE(mesh_size,(npsp))
 710    ABI_ALLOCATE(shape_type,(npsp))
 711    ABI_ALLOCATE(pawver,(npsp))
 712    ABI_ALLOCATE(rshp,(npsp))
 713    do ii=1,npsp
 714      basis_size(ii)=pspheads(ii)%pawheader%basis_size
 715      mesh_size(ii)=pspheads(ii)%pawheader%mesh_size
 716      l_size(ii)=pspheads(ii)%pawheader%l_size
 717      lmn_size(ii)=pspheads(ii)%pawheader%lmn_size
 718      lmn2_size(ii)=lmn_size(ii)*(lmn_size(ii)+1)/2
 719      pawver(ii)=pspheads(ii)%pawheader%pawver
 720      rshp(ii)=pspheads(ii)%pawheader%rshp
 721      shape_type(ii)=pspheads(ii)%pawheader%shape_type
 722    end do
 723    l_max=maxval(pspheads(:)%lmax)
 724    l_size_max=maxval(pspheads(:)%pawheader%l_size)
 725    rhoij_nspden=nspden;if (pawspnorb>0) rhoij_nspden=4
 726    ABI_ALLOCATE(my_nattyp,(ntypat))
 727    if ((mpi_enreg%nproc_atom<=1).or.(.not.associated(mpi_enreg%my_atmtab))) then
 728      my_nattyp=nattyp
 729    else
 730      my_nattyp=0
 731      do ii=1,my_natom
 732        jj=typat(mpi_enreg%my_atmtab(ii))
 733        my_nattyp(jj)=my_nattyp(jj)+1
 734      end do
 735    end if
 736  else
 737 !  Do the allocation to avoid uninitialised variables.
 738    ABI_ALLOCATE(my_nattyp,(1))
 739    ABI_ALLOCATE(basis_size,(1))
 740    ABI_ALLOCATE(l_size,(1))
 741    ABI_ALLOCATE(lmn_size,(1))
 742    ABI_ALLOCATE(lmn2_size,(1))
 743    ABI_ALLOCATE(mesh_size,(1))
 744    ABI_ALLOCATE(shape_type,(1))
 745    ABI_ALLOCATE(pawver,(1))
 746    ABI_ALLOCATE(rshp,(1))
 747    rhoij_nspden=nspden
 748    l_size_max=1
 749    l_max=1
 750  end if
 751 
 752  n_fftgr=1;iscf10=mod(iscf,10)
 753  if(iscf10==1)              n_fftgr=5
 754  if(iscf10==2)              n_fftgr=3
 755  if(iscf10==3)              n_fftgr=4
 756  if(iscf10==4)              n_fftgr=6
 757  if(iscf10==5.or.iscf10==6) n_fftgr=10
 758  if(iscf10==7)              n_fftgr=2+2*npulayit
 759 
 760 !work1 and work2 in fourdp : take into account approximately fftalgb
 761  fftalgb=mod(ngfft(7),100)/10
 762  if(fftalgb==0)narr_fourdp=2*2
 763  if(fftalgb==1)narr_fourdp=2
 764 
 765  ngrad=1;if(xclevel==2.or.tfkinfunc>10)ngrad=2
 766 
 767 !(1)                     in main, driver, gstate and brdmin ----------------
 768 !in move, nothing interesting is allocated.
 769 !kg (gstate)
 770  cmpw(1)=3*mkmem               ; dttyp(1)=4
 771 !indsym (gstate)
 772  cadd(3)=4*nsym*natom          ; dttyp(3)=4
 773 !irrzon  (gstate)
 774  if(nsym/=1)then
 775    cfft(4)=2*((nspden/nsppol)-3*(nspden/4))    ; dttyp(4)=4
 776  end if
 777 !ylm (gstate)
 778  cmpw(5)=mkmem*mpsang*mpsang*useylm ; dttyp(5)=8
 779 !
 780 !rhor,rhog (gstate)
 781  cfftf(5)=nspden+2              ; dttyp(5)=8
 782 !cg (gstate)
 783  cmpw(6)=2*nspinor*mband*mkmem*nsppol  ; dttyp(6)=8
 784 !eigen,resid,occ (occ is initialized in abinit, and not in driver)
 785  cadd(7)=3*mband*nkpt*nsppol   ; dttyp(7)=8
 786 !qgrid_vl,qgrid_ff,vlspl,ffspl
 787  cadd(8)=mqgrid_vl*(1+2*ntypat)   &
 788 & +mqgrid_ff*(1+2*ntypat*lnmax)   &
 789 & ; dttyp(8)=8
 790 !ph1d (actually allocated in scfcv !!)
 791  cadd(9)=2*3*(2*mgfft+1)*natom ; dttyp(9)=8
 792  cadd(9)=cadd(9)+2*3*(2*mgfftf+1)*natom*usepaw  !Additional ph1df for PAW
 793 !phnons (in gstate)
 794  if(nsym/=1)then
 795    cfft(10)=2*((nspden/nsppol)-3*(nspden/4))    ; dttyp(10)=8
 796  end if
 797 !xccc1d (in driver)
 798  cadd(11)=n1xccc*6*ntypat      ; dttyp(11)=8
 799 
 800 !hessin in brdmin
 801  if(ionmov==2)then
 802    cadd(15)=3*natom*3*natom      ; dttyp(15)=8
 803  end if
 804 
 805 !Additional PAW arrays
 806 !PAW datasets (pawtab)
 807  if (usepaw==1) then
 808    dttyp(16)=8 ; dttyp(17)=4
 809    do ii=1,npsp
 810      cadd(16)=cadd(16)+2*mesh_size(ii)*basis_size(ii)   !phi,tphi
 811      cadd(16)=cadd(16)+2*mesh_size(ii)*basis_size(ii)&  !phiphj,tphiphj
 812 &    *(basis_size(ii)+1)/2
 813      cadd(16)=cadd(16)+mesh_size(ii)*l_size(ii)         !shapefunc
 814      cadd(16)=cadd(16)+lmn2_size(ii)*l_size(ii)**2      !qijl
 815      cadd(16)=cadd(16)+l_size(ii)*5                     !gnorm,shape_a,shape_q
 816      cadd(16)=cadd(16)+lmn2_size(ii)*(4+lmn2_size(ii))  !eijkl,dltij,dij0,rhoij0,sij
 817      cadd(17)=cadd(17)+lmn2_size(ii)*8                  !indklmn
 818      cadd(16)=cadd(16)+mesh_size(ii)*5                  !coreden,tcoreden,rad,radfact,simfact
 819      if (shape_type(ii)==-1) cadd(16)=cadd(16)+4*mesh_size(ii)*l_size(ii)  !dshpfunc
 820      cadd(16)=cadd(16)+mqgrid_vl*2                      !tncorespl
 821      if (pawver(ii)>=4) cadd(16)=cadd(16)+mqgrid_vl*2   !tnvalespl
 822    end do
 823 !  additional arrays
 824    cadd(16)=cadd(16)+l_size_max*2*l_max*nsym                 !zarot
 825    cadd(16)=cadd(16)+(2*l_max-1)**2*l_max**2*(l_max**2+1)/2  !realgnt
 826    cadd(17)=cadd(17)+nfft+nfftf                              ! fintocoa,coatofin
 827    do ii=1,ntypat
 828      cadd(16)=cadd(16)+my_nattyp(ii)*lmn2_size(ii)*rhoij_nspden*pawcpxocc ! Rhoij and related data
 829      cadd(17)=cadd(17)+my_nattyp(ii)*(2+lmn2_size(ii))    ! (rhoijselect, ...)
 830    end do
 831  end if
 832 
 833 !SCF history (if selected)
 834  if (abs(densfor_pred)==5.or.abs(densfor_pred)==6) then          ! scf_history...
 835    histsz=2
 836    cfftf(18)=nspden*(histsz+1)+1      ; dttyp(18)=8  ! %deltarhor, %atmrho_last, %rhor_last
 837    cadd(19)=3*natom*2*histsz          ; dttyp(19)=8  ! %xreddiff,xred_last
 838    dttyp(20)=4
 839    if (usepaw==1) then
 840      do ii=1,ntypat
 841        cadd(19)=cadd(19)+histsz*2*my_nattyp(ii)*lmn2_size(ii)*rhoij_nspden*pawcpxocc  ! %pawrhoij()%rhoijp
 842        cadd(20)=cadd(20)+histsz*2*my_nattyp(ii)*(2+lmn2_size(ii))*nspden              ! %pawrhoij()%rhoijselect
 843      end do
 844    end if
 845    if (extrapwf>0) then
 846      cadd(19)=cadd(19)+histsz*2*nspinor*mband*mkmem*nsppol  ; dttyp(19)=8  ! %cg
 847    end if
 848  end if
 849 
 850 !(2)                     in scfcv----------------------------------------
 851 
 852 !vhartr,vpsp,vtrial,vxc
 853  cfftf(21)=2+2*nspden           ; dttyp(21)=8
 854 !kxc
 855  if (abs(densfor_pred)>0.and.iscf>=10) then
 856    cfftf(21)=cfftf(21)+3*nspden
 857    if (densfor_pred<0.and.xclevel==2) cfftf(21)=cfftf(21)+20*nspden
 858  end if
 859  if(iscf>0)then
 860 !  f_fftgr
 861    if (pawmixdg==1) then
 862      cfftf(22)=nspden*n_fftgr*mffmem; dttyp(22)=8
 863    else
 864      cfft(22)=nspden*n_fftgr*mffmem; dttyp(22)=8
 865    end if
 866  end if
 867  if( iscf>0 .and. (modulo(iprcel,100)>=20.and.modulo(iprcel,100)<70))then
 868 !  dielinv, susmat
 869    cadd(23)=4*(npwdiel*min(nspden,2))**2; dttyp(23)=8
 870  end if
 871 !Kernel of Poisson's solver
 872  if (icoulomb == 1) then
 873    cadd(24) = ngfft(4)*ngfft(5)*ngfft(6) ; dttyp(24) = 8
 874  end if
 875  if( (iscf>0 .and. modulo(iprcel,100)>=20 .and. modulo(iprcel,100)<70) .or. iscf==-1 )then
 876 !  kg_diel
 877    cadd(27)=3*npwdiel             ; dttyp(27)=4
 878    if(nsym/=1)then
 879 !    irrzondiel
 880      cadd(27)=cadd(27)+2*nfftdiel*(nspden/nsppol)
 881 !    phnonsdiel
 882      cadd(28)=2*nfftdiel*(nspden/nsppol)   ; dttyp(28)=8
 883    end if
 884  end if
 885  if(n1xccc/=0)then
 886 !  xccc3d
 887    cfftf(29)=1                    ; dttyp(29)=8
 888  end if
 889 
 890 !Additional PAW arrays
 891  dttyp(25)=8 ; dttyp(26)=4
 892  if (usepaw==1) then
 893    do ii=1,ntypat
 894      jj=(1+int(nfftf*four_pi*rshp(ii)**3/(three*ucvol)))        ! pawfgrtab
 895      cadd(26)=cadd(26)+my_nattyp(ii)*jj                         !   %ifftsph
 896      cadd(25)=cadd(25)+my_nattyp(ii)*jj*(1-pawstgylm)*3         !   %rfgd (if pawstgylm=0)
 897      cadd(25)=cadd(25)+my_nattyp(ii)*jj*pawstgylm*l_size(ii)**2 !   %gylm (if pawstgylm=1)
 898      if (optforces==1) cadd(25)=cadd(25)+my_nattyp(ii)*jj&      !   %gylmgr,%rfgd (if pawstgylm=1)
 899 &    *pawstgylm*(3*l_size(ii)**2+3*optstress)
 900      cadd(26)=cadd(26)+my_nattyp(ii)*l_size(ii)**2/32           ! lmselect  !now a boolean
 901      cadd(25)=cadd(25)+my_nattyp(ii)*lmn2_size(ii)*nspinor**3   ! dij
 902      if (iscf>0) then
 903        cadd(25)=cadd(25)+my_nattyp(ii)*lmn2_size(ii)*rhoij_nspden*pawcpxocc                ! rhoijres
 904        cadd(25)=cadd(25)+my_nattyp(ii)*lmn2_size(ii)*rhoij_nspden*pawcpxocc*n_fftgr*mffmem ! f_paw
 905      end if
 906    end do
 907    cadd(25)=cadd(25)+(1+3*pawnhatxc*(ngrad/2))*nspden*nfftf       !nhat,nhatgr
 908  end if
 909 
 910 !(3)                     in rhotoxc, xcden -------------------------------
 911 
 912  if(xclevel/=0)then
 913    if(n1xccc/=0)then
 914 !    rhocorval
 915      cfftf(31)=nspden               ; dttyp(31)=8
 916    end if
 917 !  dnexcdn, rhonow
 918    nspgrad=nspden*ngrad
 919    if(nspden==2 .and. ngrad==2)nspgrad=5
 920    cfftf(32)=nspden*ngrad*ngrad+nspgrad  ; dttyp(32)=8
 921    if(intxc==1 .or. ngrad==2)then
 922 !    wkcmpx,work in xcden +work1,work2 in fourdp
 923      cfftf(33)=3+narr_fourdp        ; dttyp(33)=8
 924      cadd(33)=narr_fourdp*(ngfftf(4)*ngfftf(5)*ngfftf(6)-nfftf)
 925    end if
 926    if(ngrad==2)then
 927 !    workgr in xcden
 928      cfftf(34)=2                    ; dttyp(34)=8
 929    end if
 930  end if
 931  if(iscf>0)then
 932 !  In this case, rhotoxc is called from rhotov also,
 933 !  for which vresid was allocated in scfcv
 934 !  vresid
 935    cfftf(35)=nspden               ; dttyp(35)=8
 936  end if
 937 !Poisson's solver with zero padding
 938  if (icoulomb == 1) then
 939    cfft(36) = 8                   ; dttyp(36) = 8
 940    cadd(36) = ngfft(4) * ngfft(5) * ngfft(6) - nfft
 941  end if
 942 
 943 !Note : in hartre, called by rhotoxc, one uses
 944 !2 dp arrays of total size 3*nfft,
 945 !and 2 arrays of total size 4*n4*n5*n6 for fourdp
 946 !This will be smaller than the total use for symrhg
 947 
 948 !(4)                     in newvtr/newrho --------------------------------------
 949 
 950  if(iscf>0)then
 951 !  vresid (allocated in scfcv) and vrespc
 952    if (pawmixdg==1) then
 953      cfftf(41)=2*nspden             ; dttyp(41)=8
 954    else
 955      cfft(41)=2*nspden             ; dttyp(41)=8
 956    end if
 957    if(mffmem==0)then
 958 !    f_fftgr_disk
 959      if (pawmixdg==1) then
 960        cfftf(42)=nspden*n_fftgr       ; dttyp(42)=8
 961      else
 962        cfft(42)=nspden*n_fftgr       ; dttyp(42)=8
 963      end if
 964 !    f_paw_disk
 965      if (usepaw==1) then
 966        dttyp(43)=8
 967        do ii=1,ntypat
 968          cadd(43)=cadd(43)+my_nattyp(ii)*lmn2_size(ii)*nspden*n_fftgr
 969        end do
 970      end if
 971    end if
 972 !  rhoupdn, n(v)resid0, vtrialg, rhog2, magng
 973    if (pawmixdg==1) then
 974      cfftf(43)=2*nspden       ; dttyp(43)=8
 975    else
 976      cfft(43)=2*nspden       ; dttyp(43)=8
 977      if (nspden>1) cfftf(43)=2*(nspden-1)
 978    end if
 979  end if
 980 
 981 !(5-6)                   in vtorho-----------------------------------------
 982 
 983 !Note : (5) is for the arrays inside the spin and k-point loop
 984 !they belong to the main chain
 985 !(6) is for the arrays after the spin and k-point loop
 986 !(6a) is for the arrays after that loop, for the parallel k-point chain
 987 !(6b) is for the arrays in mkrho, for the mkrho chain
 988 !(6c) is for the arrays in symrhg, for the fourdp chain
 989 !(6d) is for the arrays in suscep, for the suscep chain, see (10)
 990 !(6e) is for the arrays in dielmt, for the dielmt chain, see (11)
 991 !(6f) is for the arrays in pawmkrhoij
 992 
 993 !eknlk, enlnk, grnlnk
 994  cadd(51)=(11+3*natom)*mband*nkpt*nsppol &
 995 & ; dttyp(51)=8
 996 !kg_k
 997  cmpw(52)=3                    ; dttyp(52)=4
 998 !rhoaug,vlocal
 999  cfft(53)=2                    ; dttyp(53)=8
1000  cadd(53)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
1001 !rhowfr,rhowfg
1002  cfft(53)=cfft(53)+2+nspden
1003  if(mkmem==0)then
1004 !  cg_disk
1005    cmpw(54)=2*nspinor*mband      ; dttyp(54)=8
1006  end if
1007 !eig_k, ek_k, enl_k, grnl_k, occ_k, resid_k
1008  cadd(56)=(14+3*natom)*mband   ; dttyp(56)=8
1009 !ylm_k
1010  cmpw(57)=mpsang*mpsang*useylm ; dttyp(57)=8
1011 !PAW:cprj
1012  if (usepaw==1) then
1013    dttyp(58)=8
1014    do ii=1,ntypat
1015      cadd(58)=cadd(58)+2*nattyp(ii)*nkpt*nspinor*mband*nsppol*lmn_size(ii)/max(mpi_enreg%nproc_band,1)
1016    end do
1017  end if
1018 
1019 !(6)                     in vtorho----------------------------------------
1020 
1021 !doccde
1022  cadd(60)=mband*nkpt*nsppol    ; dttyp(60)=8
1023 
1024 !(6a)                    in vtorho----------------------------------------
1025  if(xmpi_paral==1)then
1026 !  Parallel case
1027 !  buffer1
1028 !  buffer2
1029    if(occopt>=3 .and. occopt <=8) then
1030      dttyp(61)=8
1031      if(nsppol*nfft >= (13+3*natom)*mband*nkpt*nspden)then
1032        cfft(61)=2*nspden
1033      else
1034        cadd(61)=(13+3*natom)*mband*nkpt*nspden
1035      end if
1036    else
1037      cfft(61)=2*nspden             ; dttyp(61)=8
1038      cadd(61)=9+3*natom+2+2*mband*nkpt*nspden
1039    end if
1040  end if
1041 
1042 
1043 !(6b)                    in mkrho, called by vtorho--------------------------
1044  if(occopt>=3 .and. occopt <=8)then
1045    if(mkmem==0)then
1046 !    cg_disk
1047      cmpw(62)=2*nspinor*mband      ; dttyp(62)=8
1048    end if
1049 !  cwavef
1050    cmpw(65)=2*nspinor            ; dttyp(65)=8
1051 
1052 !  rhoaug, wfraug, work1 in fourwf
1053    cfft(66)=5                    ; dttyp(66)=8
1054    cadd(66)=5*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
1055  end if
1056 
1057 !(6c)                    in symrhg, called by vtorho--------------------------
1058  if(iscf>0)then
1059    cfft(67)=narr_fourdp          ; dttyp(67)=8
1060    cadd(67)=narr_fourdp*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
1061    if(nsym>1)then
1062 !    work1  in symrhg
1063      cfft(68)=2                    ; dttyp(68)=8
1064      cadd(68)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
1065    end if
1066  end if
1067 
1068 
1069 !(6d) and (6e)           in suscep and dielmt, called by vtorho,
1070 !see (10) and (11) -------------------------------
1071 
1072 !(6f)  in pawmkrhoij or symrhoij called by pawmkrho, called by vtorho--------
1073 !only when paralellim over atoms is activated
1074  dttyp(63)=8
1075  if((usepaw==1) .and. ((iscf>0) .or. (iscf == -3) .and. mpi_enreg%nproc_atom>1 ))then
1076    do ii=1,ntypat
1077      cadd(63)=cadd(63)+nattyp(ii)*lmn2_size(ii)*rhoij_nspden*pawcpxocc   ! Rhoij_gather and related data
1078      cadd(63)=cadd(63)+nattyp(ii)*(2+lmn2_size(ii))    ! Rhoij_gather (rhoijselect, ...)
1079    end do
1080  end if
1081 
1082 !(7)                     in vtowfk----------------------------------------
1083 
1084 !evec
1085  cadd(71)=2*mband*mband        ; dttyp(71)=8
1086 !subham, subvnl(if not PAW)
1087  cadd(72)=(1+usepaw)*mband*(mband+1)    ; dttyp(72)=8
1088 !gkpsq
1089  cmpw(73)=1                    ; dttyp(73)=8
1090 !ffnl
1091  cmpw(74)=2*ntypat*lmnmax      ; dttyp(74)=8
1092 !ph3d
1093  matblk=min(NLO_MINCAT,maxval(nattyp))
1094  if(nloalg(2)<=0)matblk=natom
1095  cmpw(75)=2*matblk             ; dttyp(75)=8
1096 !gsc(if PAW)
1097  cmpw(76)=2*mband*nspinor*usepaw          ; dttyp(76)=8
1098 !Note : matvnl and mat1 do not belong to a chain defined until now
1099 !
1100  if(occopt<3 .and. iscf>0)then
1101 !  cwavef
1102    cmpw(77)=2*nspinor            ; dttyp(77)=8
1103 !  wfraug
1104    cfft(78)=2                    ; dttyp(78)=8
1105    cadd(78)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
1106 !  work1 in fourwf
1107    cfft(79)=2                    ; dttyp(79)=8
1108    cadd(79)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
1109  end if
1110 
1111 !(8)                     in cgwf------------------------------------------
1112 
1113 !conjgr, cwavef, direc, gh_direc, gvnl_direc
1114  cmpw(81)=2*5*nspinor          ; dttyp(81)=8
1115 !ghc,gvnlc
1116  cmpw(82)=2*2*nspinor          ; dttyp(82)=8
1117 !PAW: scwavef,direc_tmp,ghc_all
1118  cmpw(83)=2*(2+mband)*nspinor*usepaw  ; dttyp(83)=8
1119 
1120 
1121 !(9a)                    in getghc and fourwf----------------------------
1122 
1123 !work (in getghc)
1124  cfft(91)=2                    ; dttyp(91)=8
1125  cadd(92)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
1126 !work1 (in fourwf)
1127  cfft(92)=2                    ; dttyp(92)=8
1128  cadd(92)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
1129 
1130 !(9b)                    in getghc, nonlop and opernl--------------------
1131  mincat=min(NLO_MINCAT,natom-ntypat+1)
1132  if (useylm==0) then                          ! ===== nonlop_pl
1133 !  gxa  (in nonlop)
1134    cadd(94)=2*20*mincat*2       ; dttyp(94)=8
1135 !  dgxdt  (in nonlop)            !MT20072002: not allocated in getghc !!
1136    if (optforces==1) then
1137      cadd(95)=2*3*20*mincat*2    ; dttyp(95)=8
1138    end if
1139 !  teffv (in opernl4 - no distinction is made for opernl, opernl2 or opernl3)
1140 !  kpgx, ffkg
1141 !  here, evaluate an upper value, with nproj=2, p,d and f orbitals, but not
1142 !  considering the stress, since it will be called outside of the main chain
1143    cadd(97)=NLO_MBLKPW*40        ; dttyp(97)=8
1144 !  kpg if nloalg(3)=1
1145    cadd(98)=3*mpw*nloalg(3)      ; dttyp(98)=8
1146  else                                        ! ===== nonlop_ylm
1147 !  gx + gxfac + gxfac_sij
1148    cadd(94)=2*lmnmax*mincat*(mpw+1+usepaw)    ; dttyp(94)=8
1149 !  kpg
1150    cadd(95)=3*mpw       ; dttyp(95)=8
1151 !  indlmn_typ, ffnl_typ
1152    cadd(96)=lmnmax*6; dttyp(96)=4
1153 !  ffnl_typ
1154    cadd(97)=lmnmax*mpw; dttyp(97)=8
1155 !  opernla_ylm: scalar,scali
1156    cadd(98)=2*mpw; dttyp(98)=8
1157  end if
1158 
1159 !(10)                    in suscep and suskmm ----------------------------
1160 
1161  if(modulo(iprcel,100)>=20.and.modulo(iprcel,100)<70)then
1162 !  Variables allocated in suscep
1163    if(mkmem==0)then
1164 !    cg_disk
1165      cmpw(101)=2*mband             ; dttyp(101)=8
1166    end if
1167    if(occopt>=3)then
1168 !    drhode
1169      cadd(103)=2*npwdiel*nsppol    ; dttyp(103)=8
1170    end if
1171 !  rhoextrap (always included, although it appears only when extrap==1)
1172    cadd(104)=ndiel456            ; dttyp(104)=8
1173 
1174 !  Variables allocated in suskmm
1175 !  cwavef
1176    cmpw(106)=2                   ; dttyp(106)=8
1177 !  rhoaug, wfraug
1178    cadd(107)=3*ndiel456          ; dttyp(107)=8
1179 !  wfprod
1180    cadd(108)=2*npwdiel           ; dttyp(108)=8
1181 !  wfrspa1, wfrspa2
1182    cadd(109)=4*ndiel456*nbnd_in_blk ; dttyp(109)=8
1183 
1184  end if
1185 
1186 !(11)                    in dielmt ---------------------------------------
1187 
1188  if(modulo(iprcel,100)>=20.and.modulo(iprcel,100)<70)then
1189 !  dielh,dielvec,eig_diel,zhpev1,zhpev2
1190    cadd(111)=3*npwdiel*npwdiel                   &
1191 &   +9*npwdiel           ; dttyp(111)=8
1192  end if
1193 
1194 !(12)                    in tddft  ---------------------------------------
1195 
1196  if(iscf==-1)then
1197    if(mkmem/=0)then
1198 !    cg_disk
1199      cmpw(121)=2*mband            ; dttyp(121)=8
1200    end if
1201 !  cwavef
1202    cmpw(124)=2*mband             ; dttyp(124)=8
1203 !  rhoaug,wfraug,wfrspa
1204    cadd(125)=(2+mband)*ndiel456  ; dttyp(125)=8
1205  end if
1206 
1207 !--------------------------------------------------------------------------
1208 
1209  chain(:,:)=.true.
1210 
1211 !Define the main chain version a (fourwf)
1212  chain(31:50,1)=.false.
1213  chain(60:70,1)=.false.
1214  chain(77:80,1)=.false.
1215  chain(93:100,1)=.false.
1216  chain(101:marrays,1)=.false.
1217 
1218 !Define the main chain version b (nonlop+opernl)
1219  chain(31:50,2)=.false.
1220  chain(60:70,2)=.false.
1221  chain(77:80,2)=.false.
1222  chain(91:92,2)=.false.
1223  chain(101:marrays,2)=.false.
1224 
1225 !Define the XC chain ( 31:40 belong only to this chain)
1226  chain(41:marrays,3)=.false.
1227 
1228 !Define the mkrho chain ( 62:66 and 76:77 belong only to this chain)
1229 !is it sure that they have to be summed ?)
1230  chain(31:50,4)=.false.
1231  chain(51:59,4)=.false.
1232  chain(61   ,4)=.false.
1233  chain(67:70,4)=.false.
1234  chain(71:marrays,4)=.false.
1235  chain(77:80,4)=.true.
1236 
1237 !Define the fourdp chain ( 67:70 belong only to this chain)
1238  chain(31:50,5)=.false.
1239  chain(51:66,5)=.false.
1240  chain(60   ,5)=.true.
1241  chain(71:marrays,5)=.false.
1242 
1243 !Define the parallel k-point chain ( 61 belong only to this chain )
1244  chain(31:50,6)=.false.
1245  chain(51:59,6)=.false.
1246  chain(62:70,6)=.false.
1247  chain(71:marrays,6)=.false.
1248 
1249 !Define the newvtr chain ( 41:50 belong only to this chain)
1250  chain(31:40,7)=.false.
1251  chain(51:marrays,7)=.false.
1252 
1253 !Define the suscep chain ( 101:110 belong only to this chain)
1254  chain(31:marrays,8)=.false.
1255  chain(60    ,8)=.true.
1256  chain(101:110,8)=.true.
1257 
1258 !Define the dielmt chain ( 111:120 belong only to this chain)
1259  chain(31:marrays,9)=.false.
1260  chain(60    ,9)=.true.
1261  chain(111:120,9)=.true.
1262 
1263 !Define the tddft chain ( 121:130 belong only to this chain)
1264  chain(31:marrays,10)=.false.
1265  chain(60    ,10)=.true.
1266  chain(121:130,10)=.true.
1267 
1268 !The memory needed for each chain has been computed
1269 !-------------------------------------------------------------------------
1270 !Still need some auxiliary data : estimate the disk space
1271 !or the maximum segment size.
1272 
1273 !XG030513 : MPIWF need to multiply mbdiskwf by the number of processors
1274 !in the WF group. For the time being, nprocwf=1
1275  nprocwf=mpi_enreg%nproc_fft
1276 
1277  mbdiskwf=(8*two*mpw*nprocwf*sum(nband(1:nkpt*nsppol)))/1024._dp**2 + 0.002_dp
1278  mbdiskpd=(8*nfftf*nsppol)/1024._dp**2 + 0.002_dp
1279 
1280 !Determine the largest array out of cg (cg_disk), f_fftgr (f_fftgr_disk), or pawfgrtab%gylm
1281  if(mkmem==0)then
1282    mbcg=(8*2*mpw*mband)/1024._dp**2 + 0.002_dp
1283  else
1284    mbcg=(8*2*mpw*mband*mkmem*nsppol)/1024._dp**2 + 0.002_dp
1285  end if
1286  if(mffmem==0)then
1287    if (pawmixdg==1) then
1288      mbf_fftgr=(8*nfftf*n_fftgr)/1024._dp**2 + 0.002_dp
1289    else
1290      mbf_fftgr=(8*nfft*n_fftgr)/1024._dp**2 + 0.002_dp
1291    end if
1292  else
1293    if (pawmixdg==1) then
1294      mbf_fftgr=(8*nfftf*n_fftgr*nsppol*mffmem)/1024._dp**2 + 0.002_dp
1295    else
1296      mbf_fftgr=(8*nfft*n_fftgr*nsppol*mffmem)/1024._dp**2 + 0.002_dp
1297    end if
1298  end if
1299  if(usepaw==1)then
1300    mbgylm=0
1301    do ii=1,ntypat                                        ! pawfgrtab
1302      jj=(1+int(nfftf*four_pi/(three*ucvol)*rshp(ii)**3))
1303      mbgylm=mbgylm+my_nattyp(ii)*jj &
1304 &     *( l_size(ii)**2*pawstgylm &                              !   %gylm   (if pawstgylm=1)
1305 &    +3*max((optforces+1)/2,optstress)*l_size(ii)**2*pawstgylm& !   %gylmgr (if pawstgylm=1)
1306 &    +3*optstress*pawstgylm&                                    !   %rfgd   (if pawstgylm=1)
1307 &    +3*(1-pawstgylm) )                                         !   %rfgd   (if pawstgylm=0)
1308    end do
1309    mbgylm=8*mbgylm/1024._dp**2 + 0.002_dp
1310  else
1311    mbgylm=0
1312  end if
1313 
1314 !-------------------------------------------------------------------------
1315  ABI_DEALLOCATE(my_nattyp)
1316  ABI_DEALLOCATE(basis_size)
1317  ABI_DEALLOCATE(l_size)
1318  ABI_DEALLOCATE(lmn_size)
1319  ABI_DEALLOCATE(lmn2_size)
1320  ABI_DEALLOCATE(mesh_size)
1321  ABI_DEALLOCATE(pawver)
1322  ABI_DEALLOCATE(shape_type)
1323  ABI_DEALLOCATE(rshp)
1324 
1325 !---------------------------------------------------------------------
1326 !Now, analyze the data
1327 
1328  call memana(cadd,cfft,cfftf,chain,cmpw,dttyp,iout,iprcel,iscf,&
1329 & marrays,mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm,mffmem,&
1330 & mpw,natom,nchain,nfft,nfftf,occopt,option,prtvol)
1331 
1332 end subroutine memory

m_memeval/memory_eval [ Functions ]

[ Top ] [ m_memeval ] [ Functions ]

NAME

 memory_eval

FUNCTION

 Big loop on the datasets:
 - for each of the datasets, write one line about the crystallographic data
 - compute the memory needs for this data set.

INPUTS

  dtsets(0:ndtset_alloc)=<type datafiles_type>contains all input variables
  iout=unit number of output file
  mpi_enregs=information about MPI parallelization
  ndtset= number of datasets to be read; if 0, no multi-dataset mode
  ndtset_alloc=number of datasets, corrected for allocation of at least
      one data set.
  npsp=number of pseudopotentials
  pspheads(npsp)=<type pspheader_type>all the important information from the
   pseudopotential file header, as well as the psp file name

OUTPUT

   printing only

PARENTS

      abinit

CHILDREN

      abi_io_redirect,getdim_nloc,getmpw,getng,libpaw_write_comm_set
      littlegroup_q,mati3inv,memorf,memory,metric,mkrdim,prtspgroup,setmqgrid
      wvl_memory

SOURCE

 87 subroutine memory_eval(dtsets,iout,mpi_enregs,ndtset,ndtset_alloc,npsp,pspheads)
 88 
 89 
 90 !This section has been created automatically by the script Abilint (TD).
 91 !Do not modify the following lines by hand.
 92 #undef ABI_FUNC
 93 #define ABI_FUNC 'memory_eval'
 94 !End of the abilint section
 95 
 96  implicit none
 97 
 98 !Arguments ------------------------------------
 99 !scalars
100  integer,intent(in) :: iout,ndtset,ndtset_alloc,npsp
101  type(MPI_type),intent(inout) :: mpi_enregs(0:ndtset_alloc)
102 !arrays
103  type(dataset_type),intent(inout) :: dtsets(0:ndtset_alloc)
104  type(pspheader_type),intent(in) :: pspheads(npsp)
105 
106 !Local variables -------------------------------
107 !scalars
108  integer :: cplex,exchn2n3d,extrapwf,getcell,idtset,ii,intxc,densfor_pred,iprcel
109  integer :: iscf,isym,jdtset,lmnmax,mem_test
110  integer :: lmnmax_eff,lmnmaxso,lnmax,lnmax_eff,lnmaxso,mband
111  integer :: me_fft,mffmem,mgfftdiel,mgfftf,mkmem,mpsang,mpspso
112  integer :: mpssoang,mpw,mqgrid,mqgriddg,mqgrid_ff,mqgrid_vl,n1xccc,natom
113  integer :: nfftdiel,nfftf,nkpt,nproc_fft,nptsgvec,npulayit,npwdiel,nspden,nspinor
114  integer :: nsppol,nsym,ntypat,occopt,optddk,optforces,optphon,optstress
115  integer :: optstrs,paral_fft,pawcpxocc,pawmixdg,pawnhatxc,pawspnorb,pawstgylm,prtvol,ptgroupma,response
116  integer :: spgroup,timrev,usepaw,useylm,use_gpu_cuda,xclevel
117  real(dp) :: diecut,dilatmx,ecut,ecut_eff,ecutdg_eff,ecutsus,ucvol
118 !arrays
119  integer :: bravais(11),mkmems(3),ngfftdiel(18)
120  integer :: ngfftf(18),nloalg(3)
121  integer,allocatable :: nband(:),symq(:,:,:),symrec(:,:,:),symrel(:,:,:)
122  real(dp),parameter :: k0(3)=(/zero,zero,zero/)
123  real(dp) :: genafm(3),gmet(3,3),gprimd(3,3),kpt_diel(3),qphon(3),rmet(3,3),rprimd(3,3)
124 
125 !*************************************************************************
126 
127  do idtset=1,ndtset_alloc
128    if(mpi_enregs(idtset)%me<0) cycle
129    call abi_io_redirect(new_io_comm=mpi_enregs(idtset)%comm_world)
130    call libpaw_write_comm_set(mpi_enregs(idtset)%comm_world)
131 
132 !  Initialisations
133    bravais(:)=dtsets(idtset)%bravais(:)
134    exchn2n3d=dtsets(idtset)%exchn2n3d
135    extrapwf=dtsets(idtset)%extrapwf
136    genafm(:) =dtsets(idtset)%genafm(:)
137    getcell=dtsets(idtset)%getcell
138    intxc=dtsets(idtset)%intxc
139    densfor_pred=dtsets(idtset)%densfor_pred
140    iprcel=dtsets(idtset)%iprcel
141    iscf=dtsets(idtset)%iscf
142    jdtset=dtsets(idtset)%jdtset ; if(ndtset==0)jdtset=0
143    me_fft=mpi_enregs(idtset)%me_fft
144    mffmem=dtsets(idtset)%mffmem
145    mpw=dtsets(idtset)%mpw
146    mqgrid=dtsets(idtset)%mqgrid
147    mqgriddg=dtsets(idtset)%mqgriddg
148    natom=dtsets(idtset)%natom
149    nkpt  =dtsets(idtset)%nkpt
150    nloalg(:)=dtsets(idtset)%nloalg(:)
151    nproc_fft=mpi_enregs(idtset)%nproc_fft
152    npulayit=dtsets(idtset)%npulayit
153    nspden=dtsets(idtset)%nspden
154    nspinor=dtsets(idtset)%nspinor
155    nsppol=dtsets(idtset)%nsppol
156    nsym     =dtsets(idtset)%nsym
157    ntypat=dtsets(idtset)%ntypat
158    occopt=dtsets(idtset)%occopt
159    optforces=dtsets(idtset)%optforces
160    paral_fft=mpi_enregs(idtset)%paral_kgb
161    pawcpxocc=dtsets(idtset)%pawcpxocc
162    pawmixdg=dtsets(idtset)%pawmixdg
163    pawnhatxc=dtsets(idtset)%pawnhatxc
164    pawspnorb=dtsets(idtset)%pawspnorb
165    pawstgylm=dtsets(idtset)%pawstgylm
166    prtvol=dtsets(idtset)%prtvol
167    ptgroupma =dtsets(idtset)%ptgroupma
168    qphon(:)=dtsets(idtset)%qptn(:)
169    spgroup   =dtsets(idtset)%spgroup
170    usepaw=dtsets(idtset)%usepaw
171    useylm=dtsets(idtset)%useylm
172    use_gpu_cuda=dtsets(idtset)%use_gpu_cuda
173    xclevel=dtsets(idtset)%xclevel
174 
175    ABI_ALLOCATE(symrel,(3,3,nsym))
176    symrel(:,:,1:nsym)=dtsets(idtset)%symrel(:,:,1:nsym)
177 
178 !  Space group output
179    call prtspgroup(bravais,genafm,iout,jdtset,ptgroupma,spgroup)
180 
181    if (dtsets(idtset)%toldff>tol16.and.optforces==0) optforces=1
182    if (dtsets(idtset)%tolrff>tol16.and.optforces==0) optforces=1
183    if (dtsets(idtset)%ionmov>tol16.and.optforces==0) optforces=1
184    if (dtsets(idtset)%imgmov>tol16.and.optforces==0) optforces=1
185    optstress=dtsets(idtset)%optstress
186    optddk=0;optphon=0;optstrs=0
187    if (dtsets(idtset)%rfddk>0.or.dtsets(idtset)%rf2_dkdk>0.or.dtsets(idtset)%rf2_dkde>0) optddk=1
188    if (dtsets(idtset)%rfelfd>0.or.dtsets(idtset)%d3e_pert1_elfd>0.or.&
189 &   dtsets(idtset)%d3e_pert2_elfd>0.or.dtsets(idtset)%d3e_pert3_elfd>0) optddk=1
190    if (dtsets(idtset)%rfphon>0.or.dtsets(idtset)%d3e_pert1_phon>0.or.&
191 &   dtsets(idtset)%d3e_pert2_phon>0.or.dtsets(idtset)%d3e_pert3_phon>0) optphon=1
192    if (dtsets(idtset)%rfstrs>0) optstrs=1
193 
194    ABI_ALLOCATE(nband,(nkpt*nsppol))
195    nband(1:nkpt*nsppol)=dtsets(idtset)%nband(1:nkpt*nsppol)
196    mband=maxval(nband(1:nkpt*nsppol))
197    dtsets(idtset)%mband=mband
198 
199 !  mpsang=max(maxval(pspheads(1:npsp)%lmax)+1,1) ! Likely problems with the HP compiler
200 !  n1xccc=maxval(pspheads(1:npsp)%xccc)
201    mpsang=1
202    n1xccc=pspheads(1)%xccc
203    do ii=1,npsp
204      mpsang=max(pspheads(ii)%lmax+1,mpsang)
205      n1xccc=max(pspheads(ii)%xccc,n1xccc)
206    end do
207 
208 !  Determine the maximum number of projectors, for the set of pseudo atom
209    call getdim_nloc(lmnmax,lmnmaxso,lnmax,lnmaxso,dtsets(idtset)%mixalch_orig,&
210 &   dtsets(idtset)%nimage,npsp,dtsets(idtset)%npspalch,ntypat,dtsets(idtset)%ntypalch,pspheads)
211 
212 !  Treatment of the effect of using a spin-orbit part
213 !  Warning : mpspso is different for each dataset; not relevant for PAW
214    mpspso=1
215    if (dtsets(idtset)%usepaw==0) then
216      do ii=1,npsp
217        if(nspinor/=1)then
218          if(pspheads(ii)%pspso/=0)then
219            if(dtsets(idtset)%so_psp(ii)/=0)then
220              mpspso=2
221            end if
222          end if
223        end if
224      end do
225    end if
226 !  In case of no spin-orbit
227    if(mpspso==1)then
228      mpssoang=mpsang ; lmnmax_eff =lmnmax; lnmax_eff =lnmax
229    else ! spin-orbit will be used
230      mpssoang=2*mpsang-1 ; lmnmax_eff =lmnmaxso ; lnmax_eff =lnmaxso
231    end if
232 !  lmnmax is not used if the Ylm are not used
233    if (useylm==0) lmnmax_eff =lnmax_eff
234 
235    ecut     =dtsets(idtset)%ecut
236    dilatmx  =dtsets(idtset)%dilatmx
237    ecut_eff=ecut*dilatmx**2
238    ecutdg_eff=dtsets(idtset)%pawecutdg*dtsets(idtset)%dilatmx**2
239 
240 !  Compute mgfft,mpw,nfft for this data set
241    call mkrdim(dtsets(idtset)%acell_orig(1:3,1),dtsets(idtset)%rprim_orig(1:3,1:3,1),rprimd)
242    call metric(gmet,gprimd,-1,rmet,rprimd,ucvol)
243 
244    if (usepaw==0) then
245      mgfftf=dtsets(idtset)%mgfft;nfftf=dtsets(idtset)%nfft;ngfftf(:)=dtsets(idtset)%ngfft(:)
246    else
247      mgfftf=dtsets(idtset)%mgfftdg;nfftf=dtsets(idtset)%nfftdg;ngfftf(:)=dtsets(idtset)%ngfftdg(:)
248    end if
249    response=0
250    if(dtsets(idtset)%rfddk/=0  .or. dtsets(idtset)%rf2_dkdk/=0 .or. dtsets(idtset)%rf2_dkde/=0 .or. &
251 &   dtsets(idtset)%rfphon/=0 .or. dtsets(idtset)%rfelfd/=0 .or. &
252 &   dtsets(idtset)%rfstrs/=0 .or. dtsets(idtset)%rfuser/=0 .or. &
253 &   dtsets(idtset)%rfmagn/=0    ) response=1
254 
255 !  Compute mgfftdiel,npwdiel,nfftdiel for this data set
256    if((modulo(iprcel,100)>=20 .and.modulo(iprcel,100) < 71).or. iscf==-1)then
257 !    Get diecut, and the fft grid to be used for the susceptibility computation
258      diecut=abs(dtsets(idtset)%diecut)
259      if( dtsets(idtset)%diecut < zero )then
260        ecutsus=ecut
261      else
262        ecutsus= ( sqrt(ecut) *0.5_dp + sqrt(diecut) *0.25_dp )**2
263      end if
264 !    Beware, for the dielectric matrix fftalg=ngfftdiel(7) is default here
265      ngfftdiel(1:3)=0 ; ngfftdiel(7)=101 ; ngfftdiel(8:18)=dtsets(idtset)%ngfft(8:18)
266      if(iscf==-1)ngfftdiel(7)=102
267      ecut_eff=ecutsus*dilatmx**2
268      call getng(dtsets(idtset)%boxcutmin,ecut_eff,gmet,k0,me_fft,mgfftdiel,nfftdiel,&
269 &     ngfftdiel,nproc_fft,nsym,paral_fft,symrel,&
270 &     use_gpu_cuda=dtsets(idtset)%use_gpu_cuda)
271 !    Compute the size of the dielectric matrix : npwdiel
272      kpt_diel(1:3)=(/ 0.0_dp, 0.0_dp, 0.0_dp /)
273      ecut_eff=diecut*dilatmx**2
274      call getmpw(ecut_eff,exchn2n3d,gmet,(/1/),kpt_diel,mpi_enregs(idtset),npwdiel,1)
275    else
276      npwdiel=1 ; mgfftdiel=1 ; nfftdiel=1 ; ngfftdiel(1:8)=1
277    end if
278 
279 !  Special treatment for the value of mqgrid to be fed in memory.F90
280 
281    nptsgvec         = 200 ! At present, this has to be chosen once and for all ...
282    if ( dtsets(idtset)%usewvl == 0) then
283      call setmqgrid(mqgrid,mqgriddg,ecut_eff,ecutdg_eff,gprimd,nptsgvec,usepaw)
284    else
285      call setmqgrid(mqgrid,mqgriddg,one,one,gprimd,nptsgvec,usepaw)
286    end if
287    mqgrid_ff=mqgrid
288    if (usepaw==0) mqgrid_vl=mqgrid
289    if (usepaw==1) mqgrid_vl=mqgriddg
290 
291 !  Compute the memory needs for this data set.
292    if(response==0)then
293 
294      if (dtsets(idtset)%usewvl == 0) then
295        mkmem=dtsets(idtset)%mkmem
296        mband=maxval(dtsets(idtset)%nband(1:nkpt*nsppol))
297 
298        ! Don't perform memory tests if MBPT.
299        mem_test = dtsets(idtset)%mem_test
300        if (any(dtsets(idtset)%optdriver == [RUNL_SIGMA, RUNL_SCREENING, RUNL_BSE])) mem_test = 0
301 
302        call memory(n1xccc,extrapwf,getcell,idtset,dtsets(idtset)%icoulomb,&
303 &       intxc,dtsets(idtset)%ionmov,iout,densfor_pred,&
304 &       iprcel,iscf,jdtset,lmnmax_eff,lnmax_eff,mband,mffmem,dtsets(idtset)%mgfft,mgfftdiel,mgfftf,mkmem,&
305 &       mpi_enregs(idtset),mpsang,mpssoang,mpw,mqgrid_ff,mqgrid_vl,natom,nband,dtsets(idtset)%nfft,nfftdiel,nfftf,&
306 &       dtsets(idtset)%ngfft,ngfftdiel,ngfftf,dtsets(idtset)%nimage,nkpt,nloalg,npsp,npulayit,npwdiel,nspden,nspinor,&
307 &       nsppol,nsym,ntypat,occopt,optforces,mem_test,optstress,pawcpxocc,pawmixdg,&
308 &       pawnhatxc,pawspnorb,pawstgylm,prtvol,pspheads,dtsets(idtset)%tfkinfunc,&
309 &       dtsets(idtset)%typat,ucvol,usepaw,useylm,use_gpu_cuda,xclevel)
310      else if( dtsets(idtset)%usepaw==0) then
311        if (mpi_enregs(idtset)%me == 0) then
312          call wvl_memory(dtsets(idtset), idtset, mpi_enregs(idtset), npsp, 1, pspheads)
313        end if
314      end if
315 
316    else
317 !    Compute the value of cplex, for which one needs symrec
318      ABI_ALLOCATE(symq,(4,2,nsym))
319      ABI_ALLOCATE(symrec,(3,3,nsym))
320      do isym=1,nsym
321        call mati3inv(symrel(:,:,isym),symrec(:,:,isym))
322      end do
323      call littlegroup_q(nsym,qphon,symq,symrec,dtsets(idtset)%symafm,timrev)
324      cplex=2-timrev
325      ABI_DEALLOCATE(symq)
326      ABI_DEALLOCATE(symrec)
327      mkmems(1)=dtsets(idtset)%mkmem
328      mkmems(2)=dtsets(idtset)%mkqmem
329      mkmems(3)=dtsets(idtset)%mk1mem
330 
331      mem_test = dtsets(idtset)%mem_test
332 
333      call memorf(cplex,n1xccc,getcell,idtset,intxc,iout,iprcel,&
334 &     iscf,jdtset,lmnmax_eff,lnmax_eff,mband,mffmem,dtsets(idtset)%mgfft,&
335 &     mkmems,mpi_enregs(idtset),mpsang,mpssoang,mpw,mqgrid_ff,natom,nband,dtsets(idtset)%nfft,&
336 &     dtsets(idtset)%ngfft,nkpt,nloalg,nspden,nspinor,nsppol,nsym,&
337 &     ntypat,occopt,optddk,optphon,mem_test,optstrs,prtvol,useylm,use_gpu_cuda,xclevel)
338    end if
339 
340 !  Deallocate temporary arrays (when they will really be temporary !)
341    ABI_DEALLOCATE(nband)
342    ABI_DEALLOCATE(symrel)
343 
344  end do ! idtset
345 
346 end subroutine memory_eval

m_memeval/setmqgrid [ Functions ]

[ Top ] [ m_memeval ] [ Functions ]

NAME

  setmqgrid

FUNCTION

  Sets the number of points needed to represent the pseudopotentials in
  reciprocal space for a specified resolution.

INPUTS

  ecut=cutoff energy for the wavefunctions
  ecutdg=cutoff energy for the fine grid in case usepaw==1
  gprimd=primitive translation vectors for reciprocal space
  nptsgvec=number of points along the smallest primitive translation vector
    of the reciprocal space
  usepaw=1 if PAW is used, 0 otherwise

OUTPUT

PARENTS

      m_psps,memory_eval

CHILDREN

SOURCE

2503 subroutine setmqgrid(mqgrid,mqgriddg,ecut,ecutdg,gprimd,nptsgvec,usepaw)
2504 
2505 
2506 !This section has been created automatically by the script Abilint (TD).
2507 !Do not modify the following lines by hand.
2508 #undef ABI_FUNC
2509 #define ABI_FUNC 'setmqgrid'
2510 !End of the abilint section
2511 
2512  implicit none
2513 
2514 !Arguments ------------------------------------
2515  integer , intent(inout)  :: mqgrid,mqgriddg
2516  integer , intent(in)  :: nptsgvec,usepaw
2517  real(dp), intent(in) :: ecut,ecutdg
2518  real(dp), intent(in) :: gprimd(3,3)
2519 
2520 !Local variables-------------------------------
2521  integer :: mqgrid2,mqgriddg2
2522  real(dp) :: gmax,gmaxdg,gvecnorm
2523  character(len=500) :: message
2524 
2525 ! *************************************************************************
2526 
2527  gvecnorm=sqrt(min(dot_product(gprimd(:,1),gprimd(:,1)), &
2528 & dot_product(gprimd(:,2),gprimd(:,2)), &
2529 & dot_product(gprimd(:,3),gprimd(:,3))))
2530  gmax=one/(sqrt2*pi)*sqrt(ecut)
2531 
2532  if (mqgrid == 0) then
2533    mqgrid2=ceiling(gmax/gvecnorm*nptsgvec)
2534    mqgrid=max(mqgrid2,3001)
2535    write(message, '(5a,i0,a)' )&
2536 &   'The number of points "mqgrid" in reciprocal space used for the',ch10,&
2537 &   'description of the pseudopotentials has been set automatically',ch10,&
2538 &   'by abinit to: ',mqgrid,'.'
2539    !MSG_COMMENT(message)
2540  else
2541    mqgrid2=ceiling(gmax/gvecnorm*nptsgvec)
2542    if (mqgrid2>mqgrid) then
2543      write(message, '(3a,i8,3a,i8,3a)' )&
2544 &     'The number of points "mqgrid" in reciprocal space used for the',ch10,&
2545 &     'description of the pseudopotentials is : ',mqgrid,'.',ch10,&
2546 &     'It would be better to increase it to at least ',mqgrid2,', or',ch10,&
2547 &     'let abinit choose it automatically by setting mqgrid = 0.'
2548      MSG_WARNING(message)
2549    end if
2550  end if
2551 
2552  if (usepaw==1) then
2553    if(ecutdg<tol6)then
2554      write(message,'(a)')&
2555 &     'The value of (paw)ecutdg is zero or negative, which is forbidden.'
2556      MSG_ERROR(message)
2557    end if
2558    gmaxdg=one/(sqrt2*pi)*sqrt(ecutdg)
2559    if (mqgriddg == 0) then
2560      mqgriddg2=ceiling(gmaxdg/gvecnorm*nptsgvec)
2561      mqgriddg=max(mqgriddg2,3001)
2562      write(message, '(5a,i0,a)' )&
2563 &     'The number of points "mqgriddg" in reciprocal space used for the',ch10,&
2564 &     'description of the pseudopotentials has been set automatically',ch10,&
2565 &     'by abinit to: ',mqgriddg,'.'
2566      !MSG_COMMENT(message)
2567    else
2568      mqgriddg2=ceiling(gmax/gvecnorm*nptsgvec)
2569      if (mqgriddg2>mqgriddg) then
2570        write(message, '(3a,i8,3a,i8,3a)' )&
2571 &       'The number of points "mqgriddg" in reciprocal space used for the',ch10,&
2572 &       'description of the pseudopotentials (fine grid) is :',mqgriddg,'.',ch10,&
2573 &       'It would be better to increase it to at least ',mqgriddg2,', or',ch10,&
2574 &       'let abinit choose it automatically by setting mqgrid = 0.'
2575        MSG_WARNING(message)
2576      end if
2577    end if
2578  end if
2579 
2580 end subroutine setmqgrid

m_memeval/wvl_memory [ Functions ]

[ Top ] [ m_memeval ] [ Functions ]

NAME

 wvl_memory

FUNCTION

 Estimation of the memory needed for waelet based computation job.
 According to the value of the option variable,
 might also try to allocate this amount of memory, and if it fails,
 might estimate the available memory.

INPUTS

  dtset=<type datafiles_type>contains all input variables.
  idtset=number of the current dataset
  mpi_enreg=information about MPI parallelization
  npsp=number of pseudopotentials
  option: if 0, no test of available memory
          if 1, the routine tries to allocate the estimated memory, for testing
                purposes, and if a failure occurs, the routine stops.
          if 2, like 1, but before stopping, the routine will provide
                an estimation of the available memory.
  pspheads(npsp)=<type pspheader_type>all the important information from the
   pseudopotential file header, as well as the psp file name

OUTPUT

  (only writing)

NOTES

 The estimator is the one provided by BigDFT.

PARENTS

      memory_eval

CHILDREN

      atomic_info,createwavefunctionsdescriptors,deallocate_lr
      memoryestimator,mkradim,wrtout,wvl_descr_atoms_set,wvl_descr_free
      wvl_setboxgeometry,xred2xcart

SOURCE

2622 subroutine wvl_memory(dtset, idtset, mpi_enreg, npsp, option, pspheads)
2623 
2624  use defs_wvltypes
2625  use m_abi2big, only : wvl_setBoxGeometry
2626  use m_wvl_descr_psp,    only : wvl_descr_free, wvl_descr_atoms_set
2627 
2628 #if defined HAVE_BIGDFT
2629  use BigDFT_API, only: MemoryEstimator, createWavefunctionsDescriptors, deallocate_lr, &
2630       & atomic_info, memory_estimation
2631 #endif
2632 
2633 !This section has been created automatically by the script Abilint (TD).
2634 !Do not modify the following lines by hand.
2635 #undef ABI_FUNC
2636 #define ABI_FUNC 'wvl_memory'
2637 !End of the abilint section
2638 
2639   implicit none
2640 
2641 !Arguments ------------------------------------
2642   !scalars
2643   integer,intent(in) :: idtset, npsp, option
2644   type(dataset_type),intent(in) :: dtset
2645   type(MPI_type),intent(in) :: mpi_enreg
2646   !arrays
2647   type(pspheader_type),intent(in) :: pspheads(npsp)
2648 
2649 !Local variables-------------------------------
2650 #if defined HAVE_BIGDFT
2651   !scalars
2652   integer :: ityp, i, mu, nstates, me, nproc, comm
2653   character(len=500) :: message
2654   real(dp) :: ehomo, radfine
2655   type(wvl_internal_type) :: wvl
2656   type(memory_estimation) :: peakmem
2657   !arrays
2658   real(dp) :: acell(3), rprimd(3,3), rprim(3,3)
2659   real(dp), allocatable :: radii_cf(:,:)
2660   real(dp), allocatable :: xred(:,:), xcart(:,:)
2661 #endif
2662 
2663 ! **************************************************************************
2664 
2665 #if defined HAVE_BIGDFT
2666 
2667  comm=mpi_enreg%comm_wvl
2668  me=xmpi_comm_rank(comm)
2669  nproc=xmpi_comm_size(comm)
2670 
2671  if(option<0 .or. option>2)then
2672    write(message, '(A,A,A,A,I0,A)') ch10,&
2673 &   ' wvl_memory : BUG -',ch10,&
2674 &   '  option=',option,' while the only allowed values are 0, 1, or 2.'
2675    call wrtout(std_out,message,'COLL')
2676  end if
2677 
2678  wvl%paw%usepaw=0 !no PAW here
2679  nullify(wvl%rholoc%d)
2680  nullify(wvl%rholoc%msz)
2681  nullify(wvl%rholoc%rad)
2682  nullify(wvl%rholoc%radius)
2683  nullify(wvl%paw%spsi)
2684  nullify(wvl%paw%indlmn)
2685  nullify(wvl%paw%spsi)
2686  nullify(wvl%paw%indlmn)
2687 
2688  write(message,*)' wvl_memory : analysis of memory needs '
2689  call wrtout(std_out,message,'COLL')
2690 
2691  if(idtset>=100)then
2692    write(message,'(80a,a,a,i5,a)')('=',mu=1,80),ch10,&
2693 &   ' Values of the parameters that define the memory need for DATASET', idtset,&
2694 &   ' (WVL).'
2695  else if(idtset/=0)then
2696    write(message,'(80a,a,a,i3,a)')('=',mu=1,80),ch10,&
2697 &   ' Values of the parameters that define the memory need for DATASET', idtset,&
2698 &   ' (WVL).'
2699  else
2700    write(message,'(80a,a,a,a)')('=',mu=1,80),ch10,&
2701 &   ' Values of the parameters that define the memory need of the present run',&
2702 &   ' (WVL).'
2703  end if
2704  call wrtout(ab_out,message,'COLL')
2705  call wrtout(std_out,message,'COLL')
2706 
2707  write(message,'( a,f7.3,a,i7,2(a,F7.3),a,a,f7.3,a,i7 )' ) &
2708 & '  wvl_hgrid =', dtset%wvl_hgrid , '   nwfshist =', dtset%nwfshist, &
2709 & ' wvl_crmult =', dtset%wvl_crmult, ' wvl_frmult =', dtset%wvl_frmult, ch10,&
2710 & '  tl_radius =', dtset%tl_radius , '  tl_nprccg =', dtset%tl_nprccg
2711  call wrtout(ab_out,message,'COLL')
2712  call wrtout(std_out,message,'COLL')
2713 
2714  if (dtset%nsppol == 2) then
2715    nstates = dtset%nelect
2716  else
2717    nstates = dtset%mband
2718  end if
2719  write(message,'(4(a,i7))')&
2720 & '      natom =', dtset%natom, '     ntypat =', dtset%ntypat, &
2721 & '    nstates =', nstates,     '     nsppol =', dtset%nsppol
2722  call wrtout(ab_out,message,'COLL')
2723  call wrtout(std_out,message,'COLL')
2724 
2725  write(message,'(80a)') ('=',mu=1,80)
2726  call wrtout(ab_out,message,'COLL')
2727  call wrtout(std_out,message,'COLL')
2728 
2729 !First, use eleconf to get radii_cf().
2730  ABI_ALLOCATE(radii_cf,(npsp, 3))
2731  do ityp = 1, npsp, 1
2732    call atomic_info(int(pspheads(ityp)%znuclpsp), int(pspheads(ityp)%zionpsp), ehomo = ehomo)
2733 
2734 !  new method for assigning the radii
2735    radii_cf(ityp, 1) = one / sqrt(abs(two * ehomo))
2736    radfine = 100.d0
2737    do i = 0, 4, 1
2738      if (pspheads(ityp)%GTHradii(i) /= zero) then
2739        radfine = min(radfine, pspheads(ityp)%GTHradii(i))
2740      end if
2741    end do
2742    radii_cf(ityp,2) = radfine
2743  end do
2744 
2745 !Compute the shifted positions and acell
2746  acell = dtset%acell_orig(1:3,1)
2747  call wvl_descr_atoms_set(acell, dtset%icoulomb, dtset%natom, dtset%ntypat, dtset%typat, wvl)
2748  ABI_ALLOCATE(xred,(3, dtset%natom))
2749  xred = dtset%xred_orig(:,:,1)
2750  rprimd = dtset%rprimd_orig(1:3,1:3,1)
2751  wvl%h(:) = dtset%wvl_hgrid
2752  call wvl_setBoxGeometry(1, radii_cf, rprimd, xred, &
2753 & wvl, dtset%wvl_crmult, dtset%wvl_frmult)
2754 !Compute acell and rprim from rprimd
2755  call mkradim(acell,rprim,rprimd)
2756  ABI_ALLOCATE(xcart,(3, dtset%natom))
2757  call xred2xcart(dtset%natom, rprimd, xcart, xred)
2758  call createWavefunctionsDescriptors(me, wvl%h(1), wvl%h(2), wvl%h(3), &
2759 & wvl%atoms, xcart, radii_cf, dtset%wvl_crmult, dtset%wvl_frmult, wvl%Glr)
2760  call MemoryEstimator(nproc, dtset%nwfshist, wvl%Glr, &
2761 & dtset%mband, dtset%nspinor, dtset%nkpt, 0, dtset%nsppol, &
2762 & 0, dtset%iscf, peakmem)
2763 
2764  call deallocate_lr(wvl%Glr)
2765  call wvl_descr_free(wvl)
2766  ABI_DEALLOCATE(radii_cf)
2767  ABI_DEALLOCATE(xred)
2768  ABI_DEALLOCATE(xcart)
2769 
2770  write(message,'(80a,a)') ('=',mu=1,80), ch10
2771  call wrtout(ab_out,message,'COLL')
2772  call wrtout(std_out,message,'COLL')
2773 
2774 #else
2775  BIGDFT_NOTENABLED_ERROR()
2776  if (.false.) write(std_out,*) idtset,npsp,option,dtset%nstep,mpi_enreg%nproc,pspheads(1)%zionpsp
2777 #endif
2778 
2779 end subroutine wvl_memory