TABLE OF CONTENTS
- ABINIT/m_memeval
- m_memeval/getdim_nloc
- m_memeval/memana
- m_memeval/memorf
- m_memeval/memory
- m_memeval/memory_eval
- m_memeval/setmqgrid
- m_memeval/wvl_memory
ABINIT/m_memeval [ Modules ]
NAME
m_memeval
FUNCTION
Functions to estimate memory requirements from the calculation parameters.
COPYRIGHT
Copyright (C) 2008-2024 ABINIT group (XG, DC, DW) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
SOURCE
16 #if defined HAVE_CONFIG_H 17 #include "config.h" 18 #endif 19 20 #include "abi_common.h" 21 22 MODULE m_memeval 23 24 use defs_basis 25 use m_abicore 26 use m_xmpi 27 use m_errors 28 use m_dtset 29 30 use defs_datatypes, only : pspheader_type 31 use defs_abitypes, only : MPI_type 32 use m_geometry, only : mkradim, mkrdim, xred2xcart, metric 33 use m_symtk, only : mati3inv, littlegroup_q 34 use m_spgdata, only : prtspgroup 35 use m_fftcore, only : getng 36 use m_kg, only : getmpw 37 use m_libpaw_tools, only : libpaw_write_comm_set 38 39 implicit none 40 41 private
m_memeval/getdim_nloc [ Functions ]
[ Top ] [ m_memeval ] [ Functions ]
NAME
getdim_nloc
FUNCTION
Determine the dimensions of arrays that contain the definition of non-local projectors : ekb, ffspl, indlmn
INPUTS
mixalch(npspalch,ntypalch,nimage)=alchemical mixing coefficients nimage=number of images npsp=number of pseudopotentials npspalch=number of pseudopotentials for alchemical purposes ntypat=number of types of pseudo atoms ntypalch=number of types of alchemical pseudo atoms pspheads(npsp)=<type pspheader_type>all the important information from the pseudopotential file headers, as well as the psp file names
OUTPUT
lmnmax=maximum number of l,m,n projectors, not taking into account the spin-orbit lmnmaxso=maximum number of l,m,n projectors, taking into account the spin-orbit lnmax=maximum number of l,n projectors, not taking into account the spin-orbit lnmaxso=maximum number of l,n projectors, taking into account the spin-orbit
SOURCE
2259 subroutine getdim_nloc(lmnmax,lmnmaxso,lnmax,lnmaxso,mixalch,nimage,npsp,npspalch,& 2260 & ntypat,ntypalch,pspheads) 2261 2262 !Arguments ------------------------------------ 2263 !scalars 2264 integer,intent(in) :: nimage,npsp,npspalch,ntypalch,ntypat 2265 integer,intent(out) :: lmnmax,lmnmaxso,lnmax,lnmaxso 2266 !arrays 2267 real(dp),intent(in) :: mixalch(npspalch,ntypalch,nimage) 2268 type(pspheader_type),intent(in) :: pspheads(npsp) 2269 2270 !Local variables------------------------------- 2271 !scalars 2272 integer :: ilang,ipsp,ipspalch,itypalch,itypat,ntyppure 2273 !integer :: llmax 2274 character(len=500) :: msg 2275 !arrays 2276 integer,allocatable :: lmnproj_typat(:),lmnprojso_typat(:),lnproj_typat(:) 2277 integer,allocatable :: lnprojso_typat(:),nproj_typat(:,:),nprojso_typat(:,:) 2278 2279 ! ************************************************************************* 2280 2281 !write(std_out,*)' getdim_nloc: 'pspheads(1)%nproj(0:3)=',pspheads(1)%nproj(0:3) 2282 2283 ABI_MALLOC(lmnproj_typat,(ntypat)) 2284 ABI_MALLOC(lmnprojso_typat,(ntypat)) 2285 ABI_MALLOC(lnproj_typat,(ntypat)) 2286 ABI_MALLOC(lnprojso_typat,(ntypat)) 2287 ABI_MALLOC(nproj_typat,(0:3,ntypat)) 2288 ABI_MALLOC(nprojso_typat,(3,ntypat)) 2289 lmnproj_typat(:)=0 ; lmnprojso_typat(:)=0 2290 lnproj_typat(:)=0 ; lnprojso_typat(:)=0 2291 nproj_typat(:,:)=0 ; nprojso_typat(:,:)=0 2292 2293 ntyppure=ntypat-ntypalch 2294 2295 !For each type of pseudo atom, compute the number of projectors 2296 !First, pure pseudo atoms 2297 if(ntyppure>0)then 2298 do itypat=1,ntyppure 2299 nproj_typat(0:3,itypat)=pspheads(itypat)%nproj(0:3) 2300 nprojso_typat(:,itypat)=pspheads(itypat)%nprojso(:) 2301 end do 2302 end if 2303 2304 !Then, alchemical pseudo atoms 2305 if(ntypalch>0)then 2306 do itypat=ntyppure+1,ntypat 2307 itypalch=itypat-ntyppure 2308 do ipsp=ntyppure+1,npsp 2309 ipspalch=ipsp-ntyppure 2310 ! If there is some mixing, must accumulate the projectors 2311 if(sum(abs(mixalch(ipspalch,itypalch,:)))>tol10)then 2312 nproj_typat(0:3,itypat)=nproj_typat(0:3,itypat)+pspheads(ipsp)%nproj(0:3) 2313 nprojso_typat(:,itypat)=nprojso_typat(:,itypat)+pspheads(ipsp)%nprojso(:) 2314 end if 2315 end do 2316 end do 2317 end if 2318 2319 !Now that the number of projectors is known, accumulate the dimensions 2320 do itypat=1,ntypat 2321 do ilang=0,3 2322 lnproj_typat(itypat)=lnproj_typat(itypat)+nproj_typat(ilang,itypat) 2323 lmnproj_typat(itypat)=lmnproj_typat(itypat)+nproj_typat(ilang,itypat)*(2*ilang+1) 2324 end do 2325 lnprojso_typat(itypat)=lnproj_typat(itypat) 2326 lmnprojso_typat(itypat)=lmnproj_typat(itypat) 2327 do ilang=1,3 2328 lnprojso_typat(itypat)=lnprojso_typat(itypat)+nprojso_typat(ilang,itypat) 2329 lmnprojso_typat(itypat)=lmnprojso_typat(itypat)+nprojso_typat(ilang,itypat)*(2*ilang+1) 2330 end do 2331 end do 2332 2333 !Compute the maximal bounds, at least equal to 1, even for local psps 2334 lmnmax=1;lmnmaxso=1;lnmax=1;lnmaxso=1 2335 do itypat=1,ntypat 2336 lmnmax =max(lmnmax ,lmnproj_typat (itypat)) 2337 lmnmaxso=max(lmnmaxso,lmnprojso_typat(itypat)) 2338 lnmax =max(lnmax ,lnproj_typat (itypat)) 2339 lnmaxso =max(lnmaxso ,lnprojso_typat (itypat)) 2340 end do 2341 !The initial coding (below) was not totally portable (MT 110215) 2342 !lmnmax=max(maxval(lmnproj_typat(1:ntypat)),1) 2343 !lmnmaxso=max(maxval(lmnprojso_typat(1:ntypat)),1) 2344 !lnmax=max(maxval(lnproj_typat(1:ntypat)),1) 2345 !lnmaxso=max(maxval(lnprojso_typat(1:ntypat)),1) 2346 2347 if(maxval(lmnproj_typat(1:ntypat))==0)then 2348 write(msg, '(3a)' )& 2349 'Despite there is only a local part to pseudopotential(s),',ch10,& 2350 'lmnmax and lnmax are set to 1.' 2351 ABI_COMMENT(msg) 2352 end if 2353 2354 !XG040806 : These lines make modifications of lnmax and lmnmax 2355 !that are unjustified in many cases, according to the many tests cases 2356 !where they produce a changes, while the test case was working properly. 2357 !One should understand better the needs, and code more appropriate changes ... 2358 !lnmax/lmnmax has to be bigger than 1+lmax (for compatibility reasons) 2359 !llmax=maxval(pspheads(1:ntypat)%lmax)+1 ! And this line might have trouble with HP compiler 2360 !if (lnmax <llmax) lnmax=llmax 2361 !if (lnmaxso <llmax) lnmaxso=llmax 2362 !if (lmnmax <llmax) lmnmax=llmax 2363 !if (lmnmaxso<llmax) lmnmaxso=llmax 2364 2365 write(msg, '(a,a,i4,a,i4,3a,i4,a,i4,a)' ) ch10,& 2366 ' getdim_nloc: deduce lmnmax =',lmnmax,', lnmax =',lnmax,',',ch10,& 2367 ' lmnmaxso=',lmnmaxso,', lnmaxso=',lnmaxso,'.' 2368 call wrtout(std_out,msg) 2369 2370 ABI_FREE(lmnproj_typat) 2371 ABI_FREE(lmnprojso_typat) 2372 ABI_FREE(lnproj_typat) 2373 ABI_FREE(lnprojso_typat) 2374 ABI_FREE(nproj_typat) 2375 ABI_FREE(nprojso_typat) 2376 2377 end subroutine getdim_nloc
m_memeval/memana [ Functions ]
[ Top ] [ m_memeval ] [ Functions ]
NAME
memana
FUNCTION
Analysis of the memory and disk space needed for the job, thanks to the data computed in the calling routine: for each array, the number of blocks of size mpw or nfft bytes, and the additional memory occupation; the list of arrays that are used for each chain. According to the value of the option variable, the routine will eventually try to allocate this amount of memory, and if it fails, estimate the maximum value nfft compatible with the available memory.
INPUTS
cadd(marrays)= count of bytes needed in addition of cmpw, cfftc and cfft. cfft(marrays) =for each array, count of blocks of size nfft bytes (coarse grid, if PAW) cfftf(marrays)=for each array, count of blocks of size nfft bytes (fine grid, if PAW) chain(marrays,nchain)=logical variable, that informs whether an array belongs to a given chain. cmpw(marrays)=for each array, count of blocks of size mpw bytes. dttyp(marrays)=datatype of the array : 4 for integers, 8 for real(dp) iout=unit number for output of formatted data. iprcel=govern the choice of preconditioner for the SCF cycle iscf=governs the choice of SCF algorithm, or non-SCF calculation. marrays=maximal number of arrays (or group of arrays) to be monitored. mbcg=number of MB needed for the cg array. mbdiskpd=number of MB needed to store a density or potential file on disk mbdiskwf=number of MB needed to store a wavefunction file on disk mbf_fftgr=number of MB needed for the f_fftgr array. mbgylm=number of MB needed for the pawfgrtab%gylm array (paw only) mffmem =governs the number of FFT arrays which are fit in core memory mpw =maximum number of planewaves in basis sphere (large number) natom =number of atoms in unit cell nchain=number of chains to be used in the estimation of memory. nfft =(effective) number of FFT grid points (for one processor) (coarse grid, if PAW) nfftf=(effective) number of FFT grid points (for one processor) (fine grid, if PAW) occopt=option for occupation numbers. If 3<=occopt<=8, varying occupation option : if 0 , no test of available memory if 1 , the routine tries to allocate the estimated memory, for testing purposes, and if a failure occurs, the routine stops. if 2 , like 1, but before stopping, the routine will provide an estimation of the available memory. prtvol=control print volume
OUTPUT
(only writing)
SOURCE
1369 subroutine memana(cadd,cfft,cfftf,chain,cmpw,dttyp,iout,iprcel,iscf,& 1370 & marrays,mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm,mffmem,& 1371 & mpw,natom,nchain,nfft,nfftf,occopt,option,prtvol) 1372 1373 !Arguments ------------------------------------ 1374 !scalars 1375 integer,intent(in) :: iout,iprcel,iscf,marrays,mffmem,mpw,natom,nchain 1376 integer,intent(in) :: nfft,nfftf,occopt,option,prtvol 1377 real(dp),intent(in) :: mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm 1378 !arrays 1379 integer,intent(in) :: dttyp(marrays) 1380 logical,intent(in) :: chain(marrays,nchain) 1381 real(dp),intent(in) :: cadd(marrays),cfft(marrays),cfftf(marrays),cmpw(marrays) 1382 1383 !Local variables------------------------------- 1384 !scalars 1385 integer :: biggest,ichain,ier,ier1,ier2,ier3,ier4,ier5,ier6,ier7,ier8,ii 1386 !integer :: jj,kk 1387 integer :: mu,nmbytes,nquarter_mbytes,quit 1388 real(dp) :: mbbigarr,mbbiggest 1389 character(len=500) :: msg 1390 !arrays 1391 real(dp),allocatable :: bigarray(:,:),bigarray1(:,:),bigarray2(:,:) 1392 real(dp),allocatable :: bigarray3(:,:),bigarray4(:,:),bigarray5(:,:) 1393 real(dp),allocatable :: bigarray6(:,:),bigarray7(:,:),bigarray8(:,:) 1394 real(dp),allocatable :: cdpadd(:),cdpfft(:),cdpfftf(:),cdpmpw(:) 1395 real(dp),allocatable :: cintfft(:),cintfftf(:),cintmpw(:),cintadd(:) 1396 real(dp),allocatable :: mbdpadd(:),mbdpfft(:),mbdpfftf(:) 1397 real(dp),allocatable :: mbdpmpw(:),mbintadd(:),mbintfft(:),mbintfftf(:) 1398 real(dp),allocatable :: mbintmpw(:),mbother(:),mbtot(:) 1399 1400 ! ************************************************************************** 1401 1402 !write(std_out,*)' memana : nchain=',nchain 1403 1404 ABI_MALLOC(cdpfftf,(nchain)) 1405 ABI_MALLOC(cdpfft,(nchain)) 1406 ABI_MALLOC(cdpmpw,(nchain)) 1407 ABI_MALLOC(cintfftf,(nchain)) 1408 ABI_MALLOC(cintfft,(nchain)) 1409 ABI_MALLOC(cintmpw,(nchain)) 1410 ABI_MALLOC(cdpadd,(nchain)) 1411 ABI_MALLOC(cintadd,(nchain)) 1412 ABI_MALLOC(mbdpadd,(nchain)) 1413 ABI_MALLOC(mbdpfftf,(nchain)) 1414 ABI_MALLOC(mbdpfft,(nchain)) 1415 ABI_MALLOC(mbdpmpw,(nchain)) 1416 ABI_MALLOC(mbintadd,(nchain)) 1417 ABI_MALLOC(mbintfftf,(nchain)) 1418 ABI_MALLOC(mbintfft,(nchain)) 1419 ABI_MALLOC(mbintmpw,(nchain)) 1420 ABI_MALLOC(mbother,(nchain)) 1421 ABI_MALLOC(mbtot,(nchain)) 1422 1423 biggest=0 1424 mbbiggest=0.0_dp 1425 1426 !For each chain, compute the number of bytes 1427 do ichain=1,nchain 1428 1429 ! First, the number of integer or real(dp), fft, mpw or add blocks 1430 cdpmpw(ichain) =sum(cmpw(:),MASK=(dttyp(:)==8).and.chain(:,ichain)) 1431 cintmpw(ichain)=sum(cmpw(:),MASK=(dttyp(:)==4).and.chain(:,ichain)) 1432 cdpfftf(ichain) =sum(cfftf(:),MASK=(dttyp(:)==8).and.chain(:,ichain)) 1433 cintfftf(ichain)=sum(cfftf(:),MASK=(dttyp(:)==4).and.chain(:,ichain)) 1434 cdpfft(ichain) =sum(cfft(:),MASK=(dttyp(:)==8).and.chain(:,ichain)) 1435 cintfft(ichain)=sum(cfft(:),MASK=(dttyp(:)==4).and.chain(:,ichain)) 1436 cdpadd(ichain) =sum(cadd(:),MASK=(dttyp(:)==8).and.chain(:,ichain)) 1437 cintadd(ichain)=sum(cadd(:),MASK=(dttyp(:)==4).and.chain(:,ichain)) 1438 1439 ! Compute the corresponding number of Mbytes 1440 mbdpmpw(ichain) =8*cdpmpw(ichain) *dble(mpw) /1024._dp**2 1441 mbintmpw(ichain)=4*cintmpw(ichain)*dble(mpw) /1024._dp**2 1442 mbdpfftf(ichain) =8*cdpfftf(ichain) *dble(nfftf)/1024._dp**2 1443 mbintfftf(ichain)=4*cintfftf(ichain)*dble(nfftf)/1024._dp**2 1444 mbdpfft(ichain) =8*cdpfft(ichain) *dble(nfft)/1024._dp**2 1445 mbintfft(ichain)=4*cintfft(ichain)*dble(nfft)/1024._dp**2 1446 mbdpadd(ichain) =8*cdpadd(ichain) /1024._dp**2 1447 mbintadd(ichain)=4*cintadd(ichain) /1024._dp**2 1448 mbother(ichain) =dble(231+6*natom)/1024._dp 1449 if(3<=occopt .and. occopt<=8)mbother(ichain)=dble(991+natom)/1024._dp 1450 1451 ! Compute the total number of Mbytes 1452 mbtot(ichain)=mbdpmpw(ichain)+mbintmpw(ichain)& 1453 & +mbdpfftf(ichain)+mbintfftf(ichain)& 1454 & +mbdpfft(ichain)+mbintfft(ichain)& 1455 & +mbdpadd(ichain)+mbintadd(ichain)+mbother(ichain) 1456 1457 ! Select the biggest chain 1458 if(mbtot(ichain)>mbbiggest)then 1459 mbbiggest=mbtot(ichain) 1460 biggest=ichain 1461 end if 1462 end do 1463 !When iprcel<20, the biggest chains cannot be number 8 or 9 ... 1464 if(modulo(iprcel,100)<20 .and. (biggest==8 .or. biggest==9))then 1465 write(msg,'(a,a,a,a,i3,a,a,a)') ch10,& 1466 & ' memana: BUG -',ch10,& 1467 & ' The biggest chain is number',biggest,' while iprcel==20.',ch10,& 1468 & ' This is not allowed.' 1469 call wrtout(std_out,msg) 1470 end if 1471 1472 write(msg, '(a,f11.3,a)' ) & 1473 & 'P This job should need less than ',& 1474 & mbbiggest+tol10,' Mbytes of memory. ' 1475 call wrtout(std_out,msg) 1476 call wrtout(iout,msg) 1477 1478 if(prtvol>=10)then 1479 if(biggest==1)write(msg,'(a)')'P Max. in main chain + fourwf.f ' 1480 if(biggest==2)write(msg,'(a)')'P Max. in main chain + nonlop.f + opernl.f ' 1481 if(biggest==3)write(msg,'(a)')'P Max. in XC chain ' 1482 if(biggest==4)write(msg,'(a)')'P Max. in mkrho chain ' 1483 if(biggest==5)write(msg,'(a)')'P Max. in fourdp chain ' 1484 if(biggest==6)write(msg,'(a)')'P Max. in parallel k-point chain ' 1485 if(biggest==7)write(msg,'(a)')'P Max. in newvtr chain ' 1486 if(biggest==8)write(msg,'(a)')'P Max. in suscep chain ' 1487 if(biggest==9)write(msg,'(a)')'P Max. in dielmt chain ' 1488 if(biggest==10)write(msg,'(a)')'P Max. in tddft chain ' 1489 call wrtout(iout,msg) 1490 1491 write(msg, '(a,i13,a,f11.3,a)' )& 1492 & 'P',nint(cintmpw(biggest)),' blocks of mpw integer numbers, for',& 1493 & mbintmpw(biggest)+tol10,' Mbytes. ' 1494 call wrtout(iout,msg) 1495 write(msg, '(a,i13,a,f11.3,a)' )& 1496 & 'P',nint(cdpmpw(biggest)),' blocks of mpw real(dp) numbers, for',& 1497 & mbdpmpw(biggest)+tol10,' Mbytes. ' 1498 call wrtout(iout,msg) 1499 if (nfft==nfftf) then 1500 if(mbintfft(biggest)+mbintfftf(biggest)>0.001)then 1501 write(msg, '(a,i13,a,f11.3,a)' )& 1502 & 'P',nint(cintfft(biggest)+cintfftf(biggest)),' blocks of nfft integer numbers, for',& 1503 & mbintfft(biggest)+mbintfftf(biggest)+tol10,' Mbytes. ' 1504 call wrtout(iout,msg) 1505 end if 1506 write(msg, '(a,i13,a,f11.3,a)' )& 1507 & 'P',nint(cdpfft(biggest)+cdpfftf(biggest)),' blocks of nfft real(dp) numbers, for',& 1508 & mbdpfft(biggest)+mbdpfftf(biggest)+tol10,' Mbytes. ' 1509 call wrtout(iout,msg) 1510 else 1511 if(mbintfftf(biggest)>0.001)then 1512 write(msg, '(a,i13,a,f11.3,a)' )& 1513 & 'P',nint(cintfftf(biggest)),' blocks of nfft (fine grid) integer numbers, for',& 1514 & mbintfftf(biggest)+tol10,' Mbytes. ' 1515 call wrtout(iout,msg) 1516 end if 1517 write(msg, '(a,i13,a,f11.3,a)' )& 1518 & 'P',nint(cdpfftf(biggest)),' blocks of nfft (fine grid) real(dp) numbers, for',& 1519 & mbdpfftf(biggest)+tol10,' Mbytes. ' 1520 call wrtout(iout,msg) 1521 if(mbintfft(biggest)>0.001)then 1522 write(msg, '(a,i13,a,f11.3,a)' )& 1523 & 'P',nint(cintfft(biggest)),' blocks of nfft (coarse grid) integer numbers, for',& 1524 & mbintfft(biggest)+tol10,' Mbytes. ' 1525 call wrtout(iout,msg) 1526 end if 1527 write(msg, '(a,i13,a,f11.3,a)' )& 1528 & 'P',nint(cdpfft(biggest)),' blocks of nfft (coarse grid) real(dp) numbers, for',& 1529 & mbdpfft(biggest)+tol10,' Mbytes. ' 1530 call wrtout(iout,msg) 1531 end if 1532 if(mbintadd(biggest)>0.001)then 1533 write(msg, '(a,13x,a,f11.3,a)' )'P',' Additional integer numbers, for',mbintadd(biggest)+tol10,' Mbytes. ' 1534 call wrtout(iout,msg) 1535 end if 1536 write(msg, '(a,13x,a,f11.3,a)' )'P',' Additional real(dp) numbers, for',mbdpadd(biggest)+tol10,' Mbytes. ' 1537 call wrtout(iout,msg) 1538 write(msg, '(a,13x,a,f11.3,a)' )'P',' With residue estimated to be ',mbother(biggest)+tol10,' Mbytes. ' 1539 call wrtout(iout,msg) 1540 write(msg, '(a)' )'P' 1541 call wrtout(iout,msg) 1542 write(msg, '(a)' )'P Comparison of the memory needs of different chains' 1543 call wrtout(iout,msg) 1544 1545 write(msg, '(a,f11.3,a)' )'P Main chain + fourwf.f ',mbtot(1)+tol10,' Mbytes. ' 1546 call wrtout(iout,msg) 1547 write(msg, '(a,f11.3,a)' )'P Main chain + nonlop.f + opernl.f',mbtot(2)+tol10,' Mbytes. ' 1548 call wrtout(iout,msg) 1549 1550 ! The next chains are not defined in the RF case. 1551 if(nchain>2)then 1552 write(msg, '(a,f11.3,a)' )'P XC chain ',mbtot(3)+tol10,' Mbytes. ' 1553 call wrtout(iout,msg) 1554 write(msg, '(a,f11.3,a)' )& 1555 & 'P mkrho chain ',mbtot(4)+tol10,' Mbytes. ' 1556 call wrtout(iout,msg) 1557 write(msg, '(a,f11.3,a)' )& 1558 & 'P fourdp chain ',mbtot(5)+tol10,' Mbytes. ' 1559 call wrtout(iout,msg) 1560 if(xmpi_paral==1)then 1561 write(msg, '(a,f11.3,a)' )& 1562 & '- parallel k-point chain ',mbtot(6)+tol10,' Mbytes. ' 1563 call wrtout(iout,msg) 1564 end if 1565 write(msg, '(a,f11.3,a)' )& 1566 & 'P newvtr chain ',mbtot(7)+tol10,' Mbytes. ' 1567 call wrtout(iout,msg) 1568 if(modulo(iprcel,100)>=20.and.modulo(iprcel,100)<70)then 1569 write(msg, '(a,f11.3,a)' )& 1570 & 'P suscep chain ',mbtot(8)+tol10,' Mbytes. ' 1571 call wrtout(iout,msg) 1572 write(msg, '(a,f11.3,a)' )& 1573 & 'P dielmt chain ',mbtot(9)+tol10,' Mbytes. ' 1574 call wrtout(iout,msg) 1575 end if 1576 if(iscf==-1)then 1577 write(msg, '(a,f11.3,a)' )& 1578 & 'P tddft chain ',mbtot(10)+tol10,' Mbytes. ' 1579 end if 1580 end if ! nchain>2 1581 1582 end if 1583 1584 !-------------------------------------------------------------------- 1585 1586 write(msg, '(a)' ) ' Rough estimation (10% accuracy) of disk space for files :' 1587 call wrtout(iout,msg) 1588 call wrtout(std_out,msg) 1589 1590 write(msg, '(a,f11.3,a,a,f11.3,a)' ) & 1591 & '_ WF disk file :',mbdiskwf+tol10,' Mbytes ;',& 1592 & ' DEN or POT disk file :',mbdiskpd+tol10,' Mbytes.' 1593 call wrtout(iout,msg) 1594 call wrtout(std_out,msg) 1595 1596 if(mffmem==0 .and. iscf>0)then 1597 if(iscf==1)then 1598 write(msg, '(a,a,a)' )& 1599 & ' mffmem==0, iscf==1 => use of 1 FFT temporary disk file,',ch10,& 1600 & ' 5 times bigger than a DEN file.' 1601 else if(iscf==2.or.iscf==12)then 1602 write(msg, '(a,a,a)' )& 1603 & ' mffmem==0, iscf==2 => use of 1 FFT temporary disk file,',ch10,& 1604 & ' 3 times bigger than a DEN file.' 1605 else if(iscf==3.or.iscf==13)then 1606 write(msg, '(a,a,a)' )& 1607 & ' mffmem==0, iscf==3 => use of 1 FFT temporary disk file,',ch10,& 1608 & ' 4 times bigger than a DEN file.' 1609 else if(iscf==4.or.iscf==14)then 1610 write(msg, '(a,a,a)' )& 1611 & ' mffmem==0, iscf==4 => use of 1 FFT temporary disk file,',ch10,& 1612 & ' 6 times bigger than a DEN file.' 1613 else if(iscf==5)then 1614 write(msg, '(a,a,a)' )& 1615 & ' mffmem==0, iscf==5 => use of 1 FFT temporary disk file,',ch10,& 1616 & ' 10 times bigger than a DEN file.' 1617 else if(iscf==6)then 1618 write(msg, '(a,a,a)' )& 1619 & ' mffmem==0, iscf==6 => use of 1 FFT temporary disk file,',ch10,& 1620 & ' 10 times bigger than a DEN file.' 1621 else if(iscf==7.or.iscf==17)then 1622 write(msg, '(a,a,a)' )& 1623 & ' mffmem==0, iscf==7 => use of 1 FFT temporary disk file,',ch10,& 1624 & ' (2+2*npulayit) times bigger than a DEN file.' 1625 end if 1626 call wrtout(iout,msg) 1627 call wrtout(std_out,msg) 1628 end if 1629 1630 !Temporary msg - estimation of PAW specific data has to be done... 1631 !Have to add the usepaw argument to use this. 1632 !if (usepaw==1) then 1633 !write(msg,'(5a)') ' WARNING: You are using PAW formalism;',ch10,& 1634 !& ' Above estimations do not take PAW',ch10,& 1635 !& ' specific data into account !' 1636 !call wrtout(iout,msg) 1637 !call wrtout(std_out,msg) 1638 !end if 1639 1640 write(msg,'(80a,a)') ('=',mu=1,80),ch10 1641 call wrtout(iout,msg) 1642 call wrtout(std_out,msg) 1643 1644 !-------------------------------------------------------------------- 1645 !Here, each processor must test its memory, so use 1646 !the PERS mode for error msgs, followed by synchronisation 1647 1648 mbbigarr=max(mbf_fftgr,mbcg,mbgylm) 1649 if(mbbigarr==mbcg) then 1650 write(msg, '(a,f12.4,a)' ) ' Biggest array : cg(disk), with',mbcg+tol10,' MBytes.' 1651 else if (mbbigarr==mbf_fftgr) then 1652 write(msg, '(a,f12.4,a)' ) ' Biggest array : f_fftgr(disk), with',mbf_fftgr+tol10,' MBytes.' 1653 else if (mbbigarr==mbgylm)then 1654 write(msg, '(a,f12.4,a)' ) ' Biggest array : pawfgrtab%gylm(gr), with',mbgylm+tol10,' MBytes.' 1655 end if 1656 call wrtout(std_out,msg) 1657 1658 !if (mpi_enreg%my_nimage>1) then 1659 !write(msg, '(a,f12.4,a)' ) & 1660 !& ' These estimations take the distribution over replicas (images) of the cell into account.' 1661 !call wrtout(std_out,msg) 1662 !end if 1663 1664 quit=0 1665 1666 if(option>=1)then 1667 1668 ! Test the ability to allocate the biggest array 1669 nquarter_mbytes=4.0_dp*mbbigarr+1.0_dp 1670 ABI_STAT_MALLOC(bigarray,(32*1024,nquarter_mbytes), ier) 1671 if(ier/=0)then 1672 write(msg,'(a,f11.3,a,a,a,a,a,a,a)')& 1673 & 'Test failed to allocate an array of',mbbigarr,' Mbytes',ch10,& 1674 & 'It is not worth to continue ',ch10,& 1675 & 'Action: modify input variable to fit the available memory,',ch10,& 1676 & 'increase limit on maximal array size or set mem_test to 0 to disable this test.' 1677 call wrtout(std_out,msg,'PERS') 1678 if(option==1)then 1679 ABI_ERROR_CLASS(msg, "MemanaError") 1680 else 1681 ABI_WARNING(msg) 1682 quit=1 1683 end if 1684 end if 1685 if(quit==0)then 1686 write(msg,'(a,f11.3,a)')' memana : allocated an array of',mbbigarr+tol10,' Mbytes, for testing purposes. ' 1687 call wrtout(std_out,msg) 1688 end if 1689 1690 ABI_SFREE(bigarray) 1691 1692 ! Test the ability to allocate the needed total memory : use 8 segments, 1693 ! hoping that the maximal segment size is not so much smaller than the 1694 ! total memory 1695 nquarter_mbytes=0.5_dp*mbbiggest+1.0_dp 1696 ABI_STAT_MALLOC(bigarray1,(32*1024,nquarter_mbytes), ier1) 1697 ABI_STAT_MALLOC(bigarray2,(32*1024,nquarter_mbytes), ier2) 1698 ABI_STAT_MALLOC(bigarray3,(32*1024,nquarter_mbytes), ier3) 1699 ABI_STAT_MALLOC(bigarray4,(32*1024,nquarter_mbytes), ier4) 1700 ABI_STAT_MALLOC(bigarray5,(32*1024,nquarter_mbytes), ier5) 1701 ABI_STAT_MALLOC(bigarray6,(32*1024,nquarter_mbytes), ier6) 1702 ABI_STAT_MALLOC(bigarray7,(32*1024,nquarter_mbytes), ier7) 1703 ABI_STAT_MALLOC(bigarray8,(32*1024,nquarter_mbytes), ier8) 1704 1705 if(ier1/=0 .or. ier2/=0 .or. ier3/=0 .or. ier4/=0 .or. ier5/=0 .or. ier6/=0 .or. ier7/=0 .or. ier8/=0) then 1706 write(msg,'(a,f11.3,a,a,a,a,a,a,a)')& 1707 & 'Test failed to allocate ',mbbiggest,' Mbytes',ch10,& 1708 & 'It is not worth to continue ',ch10,& 1709 & 'Action: modify input variables or submission parameters to fit the available memory,',ch10,& 1710 & 'increase limit on available memory or set mem_test to 0 to disable this test.' 1711 if(option==1)then 1712 ABI_ERROR_CLASS(msg, "MemanaError") 1713 else 1714 ABI_WARNING(msg) 1715 quit=1 1716 end if 1717 end if 1718 1719 if(quit==0)then 1720 write(msg,'(a,f11.3,a,a,a)')& 1721 & ' memana: allocated ',mbbiggest,'Mbytes, for testing purposes. ',ch10,& 1722 & ' The job will continue.' 1723 call wrtout(std_out,msg) 1724 end if 1725 ABI_SFREE(bigarray1) 1726 ABI_SFREE(bigarray2) 1727 ABI_SFREE(bigarray3) 1728 ABI_SFREE(bigarray4) 1729 ABI_SFREE(bigarray5) 1730 ABI_SFREE(bigarray6) 1731 ABI_SFREE(bigarray7) 1732 ABI_SFREE(bigarray8) 1733 1734 end if 1735 1736 !-------------------------------------------------------------------- 1737 1738 if(option==2 .and. quit==1 )then 1739 1740 ! Estimation of the available memory 1741 ! 1742 ! A quarter of Mbyte is 256*1024/8 real(dp) numbers, 1743 ! that is 32*1024 dp numbers. 1744 ! One begins with the allocation of 4 Mbytes. If successful, 1745 ! one increases that number, until the allocation is not successfull 1746 ! any more. Unfortunately, on a P6 with the pghpf compiler, the 1747 ! allocate instruction generate a core dump, instead of returning 1748 ! an error code, so that this part of code has been made optional. 1749 1750 nquarter_mbytes=16 1751 nmbytes=nquarter_mbytes/4.0_dp 1752 1753 ! With an increase ratio of 1.25_dp (see below), ii=5 leads to 9 MB, 1754 ! ii=10 leads to 28 MB, ii=15 leads to 85 MB, ii=18 leads to 165 MB, 1755 ! ii=30 is over 2 GB 1756 do ii=1,30 1757 ABI_STAT_MALLOC(bigarray,(32*1024,nquarter_mbytes), ier) 1758 if(ier/=0)then 1759 write(msg,'(a,i0,a)')' memana : failed to allocate ',nmbytes,' Mbytes' 1760 call wrtout(std_out,msg,'PERS') 1761 exit 1762 end if 1763 write(msg,'(a,i0,a)')' memana : succeeded to allocate ',nmbytes,' Mbytes' 1764 call wrtout(std_out,msg,'PERS') 1765 ! Here really test the space 1766 ! do kk=1,nquarter_mbytes 1767 ! do jj=1,32*1024,37 1768 ! bigarray(jj,kk)=0.0_dp 1769 ! end do 1770 ! write(std_out,*)' memana : wrote ',kk,' quarter of mbytes' 1771 ! end do 1772 ABI_FREE(bigarray) 1773 nquarter_mbytes=dble(nquarter_mbytes)*1.25_dp 1774 nmbytes=nquarter_mbytes/4.0_dp 1775 end do 1776 ABI_SFREE(bigarray) 1777 1778 ABI_ERROR_CLASS("in memana with option==2 .and. quit==1", "MemanaError") 1779 end if ! End the test of the available memory 1780 1781 !-------------------------------------------------------------------- 1782 1783 ABI_FREE(cdpfftf) 1784 ABI_FREE(cdpfft) 1785 ABI_FREE(cdpmpw) 1786 ABI_FREE(cintfftf) 1787 ABI_FREE(cintfft) 1788 ABI_FREE(cintmpw) 1789 ABI_FREE(cdpadd) 1790 ABI_FREE(cintadd) 1791 ABI_FREE(mbdpadd) 1792 ABI_FREE(mbdpfftf) 1793 ABI_FREE(mbdpfft) 1794 ABI_FREE(mbdpmpw) 1795 ABI_FREE(mbintadd) 1796 ABI_FREE(mbintfftf) 1797 ABI_FREE(mbintfft) 1798 ABI_FREE(mbintmpw) 1799 ABI_FREE(mbother) 1800 ABI_FREE(mbtot) 1801 1802 end subroutine memana
m_memeval/memorf [ Functions ]
[ Top ] [ m_memeval ] [ Functions ]
NAME
memorf
FUNCTION
Estimation of the memory needed for a response-function job. According to the value of the option variable, might also try to allocate this amount of memory, and if it fails, might estimate the available memory.
INPUTS
cplex=1 or 2, indicate whether the den and pot functions are real or complex getcell=if non-zero, the values of acell and rprim are taken from the output of another dataset idtset=number of the current dataset intxc=control xc quadrature iout=unit number for output of formatted data. iprcel=govern the choice of preconditioner for the SCF cycle iscf=governs the choice of SCF algorithm, or non-SCF calculation. jdtset=index of the current dataset lmnmax=max. number of (l,m,n) components over all type of psps lnmax =max. number of (l,n) components over all type of psps mband =maximum number of bands mffmem =governs the number of FFT arrays which are fit in core memory mgfft =maximum single fft dimension mkmems=number of k points which can fit in memory; set to 0 if use disk the three values correspond to mkmem, mkqmem and mk1mem mpi_enreg=information about MPI parallelization mpssang is 1+maximum angular momentum for nonlocal pseudopotential mpssoang is 1+maximum (spin*angular momentum) for nonlocal pseudopotential mpw =maximum number of planewaves in basis sphere (large number) mqgrid=maximum dimension of grid of q values for psp representations natom =number of atoms in unit cell nband(nkpt*nsppol)=number of bands at each k point, for each polarization nfft =(effective) number of FFT grid points (for one processor) ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft nkpt =number of k points nloalg(3)=governs the choice of the algorithm for non-local operator. nspden=number of spin-density components nspinor=number of spinorial components of the wavefunctions nsppol=number of channels for spin-polarization (1 or 2) nsym =number of symmetry elements in space group ntypat=number of types of atoms n1xccc=dimension of xccc1d ; 0 if no XC core correction is used occopt=option for occupation numbers. If 3<=occopt<=7, varying occupation optddk=1 if ddk is computed during run optphon=1 if phonons are computed during run option : if 0 , no test of available memory if 1 , the routine tries to allocate the estimated memory, for testing purposes, and if a failure occurs, the routine stops. if 2 , like 1, but before stopping, the routine will provide an estimation of the available memory. optstrs=1 if strain perturbation is computing during run prtvol=control print volume useylm=governs the way the nonlocal operator is to be applied: 1=using Ylm, 0=using Legendre polynomials gpu_option= GPU implementation to use, i.e. cuda, openMP, ... (0=not using GPU) xclevel= level of the XC functional
OUTPUT
(only writing)
NOTES
for the estimation, it is only taken into account those arrays that have some probability of being larger than 1000*8 bytes : - All the arrays that have large numbers as one of their dimensions (mqgrid, mpw, nfft, ngfft(4)*ngfft(5)*ngfft(6),n1xccc or a constant larger than 1000) - All the arrays that have a product of two moderately large numbers (potential size above 30 : mband, mgfft, mkmems, natom, nkpt, nsym, or a constant larger than 30) After this estimation, an amount of (176 + 55 + 6*natom) Kbytes is added to take into account the static arrays declared in rhotoxc and daughter routines (at maximum 22*1000 dp numbers), as well as other arrays like character(len=500) :: msg (present in about 100 routines), or the different arrays allocated in move.f, brdmin.f, gstate.f (xf array) or pspini.f In the case 3<=occopt<=7 this amount is increased by 760 Kbytes to take into account the arrays smdfun, occfun, entfun, workfun and xgrid, declared in getnel The current version takes into account only : 1) and 2) the "main chain" in its two slightly different versions : driver - respfn - dfpt_looppert - dfpt_scfcv - dfpt_vtorho - dfpt_vtowfk - dfpt_cgwf - getghc - fourwf or (nonlop+opernl) Also, it is assumed that the potentials are non-local, even if there are local ! It would be necessary to update this routine now that the beginning of psp files is read before the present call (XG 980502) Some BIG approximations, not present in the GS corresponding routine have been done : nsym=nsym1, nkpt=nkpt_rbz, mpw=mpw1 ...
SOURCE
1901 subroutine memorf(cplex,n1xccc,getcell,idtset,intxc,iout,iprcel,& 1902 & iscf,jdtset,lmnmax,lnmax,mband,mffmem,mgfft,& 1903 & mkmems,mpi_enreg,mpsang,mpssoang,mpw,mqgrid,& 1904 & natom,nband,nfft,ngfft,& 1905 & nkpt,nloalg,nspden,nspinor,nsppol,nsym,ntypat,& 1906 & occopt,optddk,optphon,option,optstrs,prtvol,useylm,gpu_option,xclevel) 1907 1908 !Arguments ------------------------------------ 1909 !scalars 1910 integer,intent(in) :: cplex,getcell,idtset,intxc,iout,iprcel,iscf 1911 integer,intent(in) :: jdtset,lmnmax,lnmax,mband,mffmem,mgfft,mpsang 1912 integer,intent(in) :: mpssoang,mpw,mqgrid,n1xccc,natom,nfft,nkpt 1913 integer,intent(in) :: nspden,nspinor,nsppol,nsym,ntypat,occopt 1914 integer,intent(in) :: optddk,option,optphon,optstrs,prtvol,useylm 1915 integer,intent(in) :: gpu_option,xclevel 1916 type(MPI_type),intent(in) :: mpi_enreg 1917 !arrays 1918 integer,intent(in) :: mkmems(3),nband(nkpt*nsppol),ngfft(18) 1919 integer,intent(in) :: nloalg(3) 1920 1921 !Local variables------------------------------- 1922 !marrays= maximal number of arrays to be monitored (or group of arrays) 1923 !cmpw(marrays)=count of blocks of size mpw bytes 1924 !cfft(marrays)=number of blocks of size nfft bytes 1925 !cadd(marrays)=additional storage needed (in bytes) 1926 !dttyp(marrays)=datatype of the array : 4 for integers, 8 for real(dp) 1927 !nchain= number of different chains of routines 1928 !chain(marrays,nchain)=different chains of routines 1929 !scalars 1930 integer,parameter :: marrays=150,nchain=2 1931 integer :: fftalgb,matblk,maxmkmem,mincat,mk1mem,mkmem,mkqmem,mu,n_fftgr 1932 integer :: narr_fourdp,ngrad,nprocwf 1933 integer :: my_natom 1934 real(dp) :: mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm 1935 character(len=500) :: msg 1936 character(len=1) :: firstchar 1937 !arrays 1938 integer :: dttyp(marrays) 1939 real(dp) :: cadd(marrays),cfft(marrays),cmpw(marrays) 1940 real(dp),allocatable :: cfft_dum(:) 1941 logical :: chain(marrays,nchain) 1942 1943 ! ************************************************************************** 1944 1945 if(option<0 .or. option>2)then 1946 write(msg, '(a,i0,a)')'option= ',option,' while the only allowed values are 0, 1, or 2.' 1947 ABI_BUG(msg) 1948 end if 1949 1950 firstchar=' ';if (gpu_option/=ABI_GPU_DISABLED) firstchar='_' 1951 cmpw(:)=zero ; cfft(:)=zero ; cadd(:)=zero 1952 dttyp(:)=0 1953 1954 call wrtout(std_out,' memorf : analysis of memory needs ') 1955 1956 if(jdtset>=100)then 1957 write(msg,'(80a,a,a,i5,a)')('=',mu=1,80),ch10,& 1958 ' Values of the parameters that define the memory need for DATASET',jdtset,' (RF).' 1959 else if(jdtset/=0)then 1960 write(msg,'(80a,a,a,i3,a)')('=',mu=1,80),ch10,& 1961 ' Values of the parameters that define the memory need for DATASET',jdtset,' (RF).' 1962 else 1963 write(msg,'(80a,a,a,a)')('=',mu=1,80),ch10,& 1964 ' Values of the parameters that define the memory need of the present run',' (RF).' 1965 end if 1966 call wrtout(iout,msg) 1967 call wrtout(std_out,msg) 1968 1969 mkmem=mkmems(1) 1970 mkqmem=mkmems(2) 1971 mk1mem=mkmems(3) 1972 my_natom=natom;if (mpi_enreg%nproc_atom>1) my_natom=mpi_enreg%my_natom 1973 1974 write(msg,'( 4(a,i8),a,4(a,i8) )' ) & 1975 & ' intxc =',intxc ,' iscf =',iscf,& 1976 & ' lmnmax =',lmnmax ,' lnmax =',lnmax,ch10,& 1977 & ' mgfft =',mgfft,' mpssoang =',mpssoang,& 1978 & ' mqgrid =',mqgrid,' natom =',natom 1979 call wrtout(iout,msg) 1980 call wrtout(std_out,msg) 1981 1982 write(msg,'( 4(a,i8),a,4(a,i8),a,4(a,i8) )' ) & 1983 & ' nloc_mem =',nloalg(2)*(nloalg(3)+1),' nspden =',nspden ,& 1984 & ' nspinor =',nspinor,' nsppol =',nsppol ,ch10,& 1985 & ' nsym =',nsym,' n1xccc =',n1xccc ,& 1986 & ' ntypat =',ntypat,' occopt =',occopt ,ch10,& 1987 & ' xclevel =',xclevel 1988 call wrtout(iout,msg) 1989 call wrtout(std_out,msg) 1990 1991 write(msg,'(4(3(a,i12),a))') & 1992 & '- mband =',mband ,' mffmem =',mffmem,& 1993 & ' mkmem =',mkmem ,ch10,& 1994 & '- mkqmem =',mkqmem ,' mk1mem =',mk1mem,& 1995 & ' mpw =',mpw ,ch10,& 1996 & ' nfft =',nfft ,' nkpt =',nkpt 1997 call wrtout(iout,msg) 1998 call wrtout(std_out,msg) 1999 2000 if (my_natom/=natom)then 2001 write(msg,'(a,i10)') 'Pmy_natom=',my_natom 2002 call wrtout(iout,msg) 2003 call wrtout(std_out,msg) 2004 end if 2005 2006 write(msg,'(80a)') ('=',mu=1,80) 2007 call wrtout(iout,msg) 2008 call wrtout(std_out,msg) 2009 2010 if(getcell>0 .or. (getcell<0 .and. idtset+getcell>0) )then 2011 write(msg,'(a,a,a,a,a,a,i3,a,i3,a,a,a,a,a,a)' )ch10,& 2012 & ' memorf : COMMENT -',ch10,& 2013 & ' The determination of memory needs at this stage is meaningless,',ch10,& 2014 & ' since getcell = ',getcell,' is non-zero, while idtset=',idtset,'.',ch10,& 2015 & ' The following numbers are obtained by supposing that acell and rprim',ch10,& 2016 & ' are NOT taken from a previous dataset. You cannot rely on them.',ch10 2017 call wrtout(iout,msg) 2018 call wrtout(std_out,msg) 2019 end if 2020 2021 n_fftgr=1 2022 if(iscf==1) n_fftgr=5 2023 if(iscf==2.or.iscf==3) n_fftgr=4 2024 if(iscf==5.or.iscf==6) n_fftgr=10 2025 2026 !work1 and work2 in fourdp : take into account approximately fftalgb 2027 fftalgb=mod(ngfft(7),100)/10 2028 if(fftalgb==0)narr_fourdp=2*2 2029 if(fftalgb==1)narr_fourdp=2 2030 2031 ngrad=1 2032 if(xclevel==2)ngrad=2 2033 2034 !(0) in main, driver, and respfn ------------------- 2035 !indsym (respfn) 2036 cadd(1)=4*nsym*natom ; dttyp(1)=4 2037 !rhor,rhog (respfn) 2038 cfft(2)=nspden+2 ; dttyp(2)=8 2039 !occ (driver), doccde (respfn) 2040 cadd(3)=2*mband*nkpt*nsppol ; dttyp(3)=8 2041 !qgrid,vlspl,ffspl (driver) 2042 cadd(4)=mqgrid*(1+2*ntypat*(1+lnmax)) & 2043 & ; dttyp(4)=8 2044 !xccc1d (driver) 2045 cadd(5)=n1xccc*6*ntypat ; dttyp(5)=8 2046 !vtrial (respfn) 2047 cfft(6)=nspden ; dttyp(6)=8 2048 !kxc (respfn) 2049 cfft(7)=2*nspden-1 ; dttyp(7)=8 2050 2051 !(1-2) in dfpt_looppert -------------------------------------- 2052 !ph1d 2053 cadd(11)=2*3*(2*mgfft+1)*natom ; dttyp(11)=8 2054 !vpsp1 2055 cfft(12)=cplex ; dttyp(12)=8 2056 !indsy1 assume that nsym=nsym1 2057 cadd(13)=4*nsym*natom ; dttyp(13)=4 2058 !irrzonr1 and phnons1 assume that nsym=nsym1 2059 if(nsym/=1)then 2060 cfft(14)=(2+(nspden/4))*((nspden/nsppol)-3*nspden/3) ; dttyp(14)=4 2061 cfft(15)=2*((nspden/nsppol)-3*nspden/3) ; dttyp(15)=8 2062 end if 2063 !doccde_rbz, eigen0, eigenq, occ_rbz, docckqde, occkq, resid 2064 !assume than nkpt=nkpt_rbz 2065 cadd(16)=7*mband*nkpt*nsppol ; dttyp(16)=8 2066 !kg 2067 cmpw(18)=3*mkmem ; dttyp(18)=4 2068 !cg 2069 cmpw(19)=2*nspinor*mband*mkmem*nsppol ; dttyp(19)=8 2070 !kg1 2071 cmpw(21)=3*mk1mem ; dttyp(21)=4 2072 !cgq 2073 cmpw(22)=2*nspinor*mband*mkqmem*nsppol ; dttyp(22)=8 2074 !cg1 2075 cmpw(23)=2*nspinor*mband*mk1mem*nsppol ; dttyp(23)=8 2076 !rhor1,rhog1 2077 cfft(24)=cplex*nspden+2 ; dttyp(24)=8 2078 !eigen1 2079 !assume than nkpt=nkpt_rbz 2080 cadd(25)=2*mband*mband*nkpt*nsppol ; dttyp(25)=8 2081 !ylm 2082 cmpw(26)=mkmem*mpsang*mpsang*useylm ; dttyp(26)=8 2083 2084 !(3) in dfpt_scfcv -------------------------------------- 2085 2086 !vhartr1,vtrial1,vxc 2087 cfft(31)=cplex+cplex*nspden+nspden ; dttyp(31)=8 2088 if(iscf>0)then 2089 ! f_fftgr 2090 cfft(32)=cplex*nspden*n_fftgr*mffmem ; dttyp(32)=8 2091 end if 2092 2093 !(4) in dfpt_vtorho---------------------------------------- 2094 2095 !proc_distrb 2096 cadd(41)=nkpt*mband*nsppol ; dttyp(41)=4 2097 !kg_k,kg1_k 2098 cmpw(42)=6 ; dttyp(42)=4 2099 !rhoaug1, vlocal, vlocal1 2100 cfft(43)=2*cplex+1 ; dttyp(43)=8 2101 cadd(43)=(2*cplex+1)*(ngfft(4)*ngfft(5)*ngfft(6)-nfft) 2102 2103 if(mkqmem==0)then 2104 ! cgq_disk 2105 cmpw(45)=2*nspinor*mband ; dttyp(45)=8 2106 end if 2107 !doccde_k,doccde_kq,eig0_k, ..., eig1_k, rocceig 2108 cadd(47)=(14+3*mband)*mband ; dttyp(47)=8 2109 !ylm_k,ylm1_k 2110 cmpw(49)=2*mpsang*mpsang*useylm ; dttyp(49)=8 2111 2112 !(5) in dfpt_vtowfk -------------------------------------- 2113 2114 !dkinpw,kinpw1 2115 cmpw(51)=2 ; dttyp(51)=8 2116 !ffnlk,ffnl1,ffnlkq 2117 cmpw(52)=2*(ntypat+2)*lmnmax ; dttyp(52)=8 2118 !ghc,gvnlxc,gvnlx1 2119 cmpw(53)=6*nspinor ; dttyp(53)=8 2120 !ph3d 2121 matblk=NLO_MINCAT 2122 if(nloalg(2)<=0)matblk=natom 2123 cmpw(54)=2*matblk ; dttyp(54)=8 2124 !wfraug,wfraug1,rhoaug 2125 cfft(55)=5 ; dttyp(55)=8 2126 cadd(55)=5*(ngfft(4)*ngfft(5)*ngfft(6)-nfft) 2127 !cwavef,cwave0,cwave1 2128 cmpw(56)=6*nspinor ; dttyp(56)=8 2129 2130 !(6) in dfpt_cgwf ---------------------------------------- 2131 2132 !gh1, gh_direc, gvnlx_direc, conjgr, direc, vresid, cwaveq 2133 cmpw(61)=14*nspinor ; dttyp(61)=8 2134 2135 !(9a) in getghc and fourwf---------------------------- 2136 2137 !work (in getghc) 2138 cfft(91)=2 ; dttyp(91)=8 2139 cadd(92)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft) 2140 !work1 (in fourwf) 2141 cfft(92)=2 ; dttyp(92)=8 2142 cadd(92)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft) 2143 2144 !(9b) in getghc, nonlop and opernl-------------------- 2145 mincat=min(NLO_MINCAT,natom-ntypat+1) 2146 if (useylm==0) then ! ===== nonlop_pl 2147 ! gxa (in nonlop) 2148 cadd(94)=2*20*mincat*2 ; dttyp(94)=8 2149 ! dgxdt (in nonlop) 2150 cadd(95)=2*3*20*mincat*2 ; dttyp(95)=8 2151 ! dgxds (in nonlop) 2152 cadd(96)=2*56*mincat*2 ; dttyp(96)=8 2153 ! teffv (in opernl4 - no distinction is made for opernl, opernl2 or opernl3) 2154 ! kpgx, ffkg 2155 ! here, evaluate an upper value, with nproj=2, p,d and f orbitals, but not 2156 ! considering the stress, since it will be called outside of the main chain 2157 cadd(97)=NLO_MBLKPW*40 ; dttyp(97)=8 2158 ! kpg if nloalg(3)=1 2159 cadd(98)=3*mpw*nloalg(3) ; dttyp(98)=8 2160 else ! ===== nonlop_ylm 2161 ! gx + gxfac 2162 cadd(94)=2* 2*mpw*lmnmax*mincat ; dttyp(94)=8 2163 ! dgxdt + dgxdtfac + d2gxdt 2164 if (optddk>0.and.optphon==0.and.optstrs==0) cadd(95)=2*2*mpw*lmnmax*mincat 2165 if (optphon>0) cadd(95)=12*2*mpw*lmnmax*mincat 2166 if (optstrs>0) cadd(95)=72*2*mpw*lmnmax*mincat 2167 dttyp(95)=8 2168 ! kpg 2169 cadd(96)=2*3*mpw ; dttyp(96)=8 2170 if (optphon>0) cadd(96)=cadd(96)+2*6*mpw 2171 ! miscelaneous: indlmn_typ, ffnl_typ 2172 cadd(97)=lmnmax*(6+mpw*(2+optstrs)); dttyp(97)=8 2173 ! opernla_ylm: scalar,scali,scalarr,scalari 2174 cadd(98)=2*mpw+2*mpw 2175 if (optddk>0.and.optstrs==0) cadd(98)=cadd(98)+2*mpw 2176 if (optstrs>0) cadd(98)=cadd(98)+9*2*mpw 2177 dttyp(98)=8 2178 end if 2179 2180 !-------------------------------------------------------------------------- 2181 2182 chain(:,:)=.true. 2183 2184 !Define the main chain version a (fourwf) 2185 chain(93:100,1)=.false. 2186 2187 !Define the main chain version b (nonlop+opernl) 2188 chain(91:92,2)=.false. 2189 2190 !The memory needed for each chain has been computed 2191 !------------------------------------------------------------------------- 2192 !Still need some auxiliary data : estimate the disk space 2193 !or the maximum segment size. 2194 2195 !XG030513 : MPIWF need to multiply mbdiskwf by the number of processors 2196 !in the WF group. For the time being, nprocwf=1 2197 nprocwf=mpi_enreg%nproc_fft 2198 2199 mbdiskwf=(8*2*mpw*nprocwf*sum(nband(1:nkpt*nsppol)))/1024._dp**2 + 0.002_dp 2200 mbdiskpd=(8*nfft*nsppol)/1024._dp**2 + 0.002_dp 2201 2202 !Determine the largest array out of cg,cg1,cgq, cg_disk or f_fftgr (f_fftgr_disk) 2203 if(mkmem==0 .and. mk1mem==0 .and. mkqmem==0)then 2204 mbcg=(8*2*mpw*nspinor*mband)/1024._dp**2 + 0.002_dp 2205 else 2206 maxmkmem=maxval(mkmems(:)) 2207 mbcg=(8*2*mpw*nspinor*mband*maxmkmem*nsppol)/1024._dp**2 + 0.002_dp 2208 end if 2209 if(mffmem==0)then 2210 mbf_fftgr=(8*cplex*nfft*n_fftgr)/1024._dp**2 + 0.002_dp 2211 else 2212 mbf_fftgr=(8*cplex*nfft*n_fftgr*nspden*mffmem)/1024._dp**2 + 0.002_dp 2213 end if 2214 2215 !--------------------------------------------------------------------- 2216 !Now, analyze the data 2217 2218 !DEBUG 2219 !write(std_out,*)' memorf : nchain=',nchain 2220 !ENDDEBUG 2221 2222 ABI_MALLOC(cfft_dum,(marrays)) 2223 cfft_dum=zero 2224 mbgylm=zero 2225 call memana(cadd,cfft,cfft_dum,chain,cmpw,dttyp,iout,iprcel,iscf,& 2226 & marrays,mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm,mffmem,& 2227 & mpw,natom,nchain,nfft,nfft,occopt,option,prtvol) 2228 ABI_FREE(cfft_dum) 2229 2230 end subroutine memorf
m_memeval/memory [ Functions ]
[ Top ] [ m_memeval ] [ Functions ]
NAME
memory
FUNCTION
Estimation of the memory needed for a ground-state job. According to the value of the option variable, might also try to allocate this amount of memory, and if it fails, might estimate the available memory.
INPUTS
extrapwf=flag controlling the extrapolation of wave functions during MD or relaxation getcell=if non-zero, the values of acell and rprim are taken from the output of another dataset idtset=number of the current dataset icoulomb=0 for periodic Fourier calculation of Hartree potential; 1 for isolated system using Poisson solver. intxc=control xc quadrature ionmov=control force calculations iout=unit number for output of formatted data. densfor_pred=govern the choice of density prediction and/or forces correction iprcel=govern the choice of preconditioner for the SCF cycle iscf=governs the choice of SCF algorithm, or non-SCF calculation. jdtset=index of the current dataset lmnmax=max. number of (l,m,n) components over all type of psps lnmax =max. number of (l,n) components over all type of psps mband =maximum number of bands mffmem =governs the number of FFT arrays which are fit in core memory mgfftf =maximum single fft dimension (fine grid, if PAW) mgfft =maximum single fft dimension (coarse grid, if PAW) mgfftdiel =maximum single fft dimension for susceptibility and dielectric matrices. mkmem =maximum number of k points which can fit in core memory mpi_enreg=information about MPI parallelization mpssang is 1+maximum angular momentum for nonlocal pseudopotential mpssoang is 1+maximum (spin*angular momentum) for nonlocal pseudopotential mpw =maximum number of planewaves in basis sphere (large number) mqgrid_ff=dimension of q (or G) grid for nl form factors (array ffspl) mqgrid_vl=dimension of q (or G) grid for Vloc (array vlspl) natom =number of atoms in unit cell nband(nkpt*nsppol)=number of bands at each k point, for each polarization nfftf =number of fft grid points for density (fine grid, if PAW) nfft =number of fft grid points for wavefunctions (coarse grid, if PAW) nfftdiel =maximum number of fft grid points for susceptibility and dielectric matrices ngfftf(18)=contain all needed information about 3D FFT (fine grid, if PAW) ngfft(18) =contain all needed information about 3D FFT (coarse grid, if PAW) ngfftdiel(18)=contain all needed information about 3D FFT, dielectric case, see ~abinit/doc/variables/vargs.htm#ngfft for susceptibility and dielectric matrices nimage=number of images (replicas) of the cell nkpt =number of k points npsp=number of different pseudopotentials npwdiel=number of plane wave for susceptibility and dielectric matrix npulayit=number of iterations used in Pulay SCF mixing nloalg(3)=governs the choice of the algorithm for non-local operator. nspden=number of spin-density components nspinor=number of spinorial components of the wavefunctions nsppol=number of channels for spin-polarization (1 or 2) nsym =number of symmetry elements in space group ntypat =number of types of atoms n1xccc=dimension of xccc1d ; 0 if no XC core correction is used occopt=option for occupation numbers. If 3<=occopt<=8, varying occupation optforces=1 if forces are computed during run option : if 0 , no test of available memory if 1 , the routine tries to allocate the estimated memory, for testing purposes, and if a failure occurs, the routine stops. if 2 , like 1, but before stopping, the routine will provide an estimation of the available memory. optstress=1 if stresses are computed during run pawcpxocc=2 if PAW occupancies (rhoij) are complex pawmixdg=1 if mixing (in PAW) is done on the fine grid pawnhatxc=1 if nhat PAW density has to be analytically included in XC pawspnorb=1 when spin-orbit is activated within PAW pawstgylm=1 if g_l(r).Y_lm(r) factors are stored in memory (PAW) prtvol=control print volume pspheads(npsp)=<type pspheader_type>all the important information from the header tfkinfun=flag controling the use of Thomas-Fermi algorithme (without WF) typat(natom)=type of each atom ucvol= unit cell volume usepaw= 0 for non paw calculation; =1 for paw calculation useylm=governs the way the nonlocal operator is to be applied: 1=using Ylm, 0=using Legendre polynomials gpu_option= GPU implementation to use, i.e. cuda, openMP, ... (0=not using GPU) xclevel=XC functional level
OUTPUT
(only writing)
NOTES
for the estimation, it is only taken into account those arrays that have some probability of being larger than 1000*8 bytes : - All the arrays that have large numbers as one of their dimensions (mqgrid, mpw, nfft, ngfft(4)*ngfft(5)*ngfft(6), ngfftdiel(4)*ngfftdiel(5)*ngfftdiel(6), n1xccc or a constant larger than 1000) - All the arrays that have a product of two moderately large numbers (potential size above 30 : mband, mgfft, mkmem, natom, nkpt, nsym, or a constant larger than 30) After this estimation, an amount of (176 + 55 + 6*natom) Kbytes is added to take into account the static arrays declared in rhotoxc and daughter routines (at maximum 22*1000 dp numbers), as well as other arrays like character(len=500) :: message (present in about 100 routines), or the different arrays allocated in move.f, brdmin.f, gstate.f (xf array) or pspini.f In the case 3<=occopt<=8 this amount is increased by 760 Kbytes to take into account the arrays smdfun, occfun, entfun, workfun and xgrid, declared in getnel. The current version takes into account 1) and 2) the "main chain" in its two slightly different versions : driver - gstate - (move or brdmin) - scfcv - vtorho - vtowfk - 3) the xc chain : driver - gstate - (move or brdmin) - scfcv - (vresfo) - rhotoxc - xcden 4) the mkrho chain : driver - gstate - (move or brdmin) - scfcv - vtorho - mkrho 5) the fourdp chain : driver - gstate - (move or brdmin) - scfcv - vtorho ( + ftofr - fourdp - symrhg ) 6) the parallel k-point chain : driver - gstate - (move or brdmin) - scfcv - vtorho - MPI_ALLREDUCE 7) the newvtr chain : driver - gstate - (move or brdmin) - scfcv - newvtr 8) the susceptibility chain : driver - gstate - (move or brdmin) - scfcv - vtorho - suscep - suskmm 9) the dielectric chain : driver - gstate - (move or brdmin) - scfcv - vtorho - dielmt 10) the tddft chain : driver - gstate - (move or brdmin) - scfcv - vtorho - tddft It is valid for all values of iscf, but not for nstep=0 (when the chain goes through energy instead of vtorho). Also, it is assumed that the potentials are non-local, even if there are local ! It would be necessary to update this routine now that the beginning of psp files is read before the present call (XG 980502) One might also estimate if there must be a chain arriving at: strnps , mkffnl, mkcore, mklocl, mkrho, prcpot, irrzg, initro, clnup1. This is because there are allocated arrays in these routines.
SOURCE
473 subroutine memory(n1xccc,extrapwf,getcell,idtset,icoulomb,intxc,ionmov,iout,densfor_pred,iprcel,& 474 & iscf,jdtset,lmnmax,lnmax,& 475 & mband,mffmem,mgfft,mgfftdiel,mgfftf,mkmem,mpi_enreg,mpsang,mpssoang,mpw,mqgrid_ff,mqgrid_vl,& 476 & natom,nband,nfft,nfftdiel,nfftf,ngfft,ngfftdiel,ngfftf,nimage,& 477 & nkpt,nloalg,npsp,npulayit,npwdiel,nspden,nspinor,nsppol,nsym,ntypat,& 478 & occopt,optforces,option,optstress,pawcpxocc,pawmixdg,pawnhatxc,pawspnorb,pawstgylm,& 479 & prtvol,pspheads,qphon,tfkinfunc,typat,ucvol,usepaw,useylm,gpu_option,xclevel) 480 481 !Arguments ------------------------------------ 482 !scalars 483 integer,intent(in) :: extrapwf,getcell,icoulomb,idtset,intxc,ionmov,iout,densfor_pred 484 integer,intent(in) :: iprcel,iscf,jdtset,lmnmax,lnmax,mband,mffmem,mgfft 485 integer,intent(in) :: mgfftdiel,mgfftf,mkmem,mpsang,mpssoang,mpw,mqgrid_ff 486 integer,intent(in) :: mqgrid_vl,n1xccc,natom,nfft,nfftdiel,nfftf,nimage,nkpt,npsp 487 integer,intent(in) :: npulayit,npwdiel,nspden,nspinor,nsppol,nsym,ntypat 488 integer,intent(in) :: occopt,optforces,option,optstress 489 integer,intent(in) :: pawcpxocc,pawmixdg,pawnhatxc,pawspnorb,pawstgylm 490 integer,intent(in) :: prtvol,tfkinfunc,usepaw,useylm,gpu_option,xclevel 491 real(dp) :: ucvol 492 type(MPI_type),intent(in) :: mpi_enreg 493 !arrays 494 integer,intent(in) :: nband(nkpt*nsppol),ngfft(18),ngfftdiel(18),ngfftf(18) 495 integer,intent(in) :: nloalg(3),typat(natom) 496 real(dp),intent(in) :: qphon(3) 497 type(pspheader_type) :: pspheads(npsp) 498 499 !Local variables------------------------------- 500 !marrays=maximal number of arrays to be monitored (or group of arrays) 501 !cmpw(marrays)=count of blocks of size mpw bytes 502 !cfft(marrays) =count of blocks of size nfft bytes (coarse grid, if PAW) 503 !cfftf(marrays)=count of blocks of size nfft bytes (fine grid, if PAW) 504 !cadd(marrays)=count of additional storage needed (in bytes) 505 !dttyp(marrays)=datatype of the array : 4 for integers, 8 for real(dp) 506 !nchain=number of different chains of routines 507 !chain(marrays,nchain)=different chains of routines 508 ! The cfoo arrays are used to store the allocated memory in the different 509 ! routines of the program. Each stack of the program can allocate some 510 ! memory and the amount is estimated and stored in cfoo(i). The lower i, 511 ! the higher routine. cfft is memory used by FFT handling, cmpw for 512 ! plane waves storage and cadd for miscellaneous memory occupation. 513 ! The unit is the multiplier of the size of nfft for cfft, the multiplier 514 ! of mpw for cmpw and the actually allocated memory for cadd. 515 ! This array stores the size of each chunk of memory (8 for double 516 ! floating point precision, 4 for integers and so on). 517 ! This array defines if the chain defined above allocate or not the 518 ! memory (depending on options). 519 !scalars 520 integer,parameter :: marrays=150,nchain=10 521 integer :: fftalgb,histsz,ii,iscf10,jj,l_max,l_size_max,matblk,mblk,mincat,mu 522 integer :: my_natom,n_fftgr,narr_fourdp,nbnd_in_blk,ndiel4,ndiel456,ndiel5,ndiel6 523 integer :: ngrad,nprocwf,nspgrad,qphase_rhoij,rhoij_nspden 524 real(dp) :: mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm 525 character(len=500) :: msg 526 ! character(len=1) :: firstchar 527 !arrays 528 integer :: dttyp(marrays),nattyp(ntypat) 529 integer,allocatable :: basis_size(:),l_size(:),lmn2_size(:),lmn_size(:) 530 integer,allocatable :: mesh_size(:),my_nattyp(:),pawver(:),shape_type(:) 531 real(dp) :: cadd(marrays),cfft(marrays),cfftf(marrays),cmpw(marrays) 532 real(dp),allocatable :: rshp(:) 533 logical :: chain(marrays,nchain) 534 535 ! ************************************************************************** 536 537 if(option<0 .or. option>2)then 538 write(msg,'(A,I0,A)')'option=',option,' while the only allowed values are 0, 1, or 2.' 539 ABI_BUG(msg) 540 end if 541 542 !firstchar=' ';if (gpu_option/=0) firstchar='_' 543 cmpw(:)=zero ; cfft(:)=zero ; cfftf(:)=zero ; cadd(:)=zero 544 dttyp(:)=0 545 546 my_natom=natom;if (mpi_enreg%nproc_atom>1) my_natom=mpi_enreg%my_natom 547 548 call wrtout(std_out,'memory: analysis of memory needs ') 549 550 if(jdtset>=100)then 551 write(msg,'(80a,a,a,i5,a)')('=',mu=1,80),ch10,& 552 ' Values of the parameters that define the memory need for DATASET',jdtset,'.' 553 else if(jdtset/=0)then 554 write(msg,'(80a,a,a,i3,a)')('=',mu=1,80),ch10,& 555 ' Values of the parameters that define the memory need for DATASET',jdtset,'.' 556 else 557 write(msg,'(80a,a,a)')('=',mu=1,80),ch10,& 558 ' Values of the parameters that define the memory need of the present run ' 559 end if 560 call wrtout(iout,msg) 561 call wrtout(std_out,msg) 562 563 write(msg,'( 4(a,i8),a,4(a,i8) )' ) & 564 & ' intxc =',intxc ,' ionmov =',ionmov,& 565 & ' iscf =',iscf ,' lmnmax =',lmnmax,ch10,& 566 & ' lnmax =',lnmax ,' mgfft =',mgfft,& 567 & ' mpssoang =',mpssoang,' mqgrid =',mqgrid_vl 568 call wrtout(iout,msg) 569 call wrtout(std_out,msg) 570 571 write(msg,'( 4(a,i8),a,4(a,i8),a,4(a,i8) )' ) & 572 & ' natom =',natom ,' nloc_mem =',nloalg(2)*(nloalg(3)+1),& 573 & ' nspden =',nspden ,' nspinor =',nspinor,ch10,& 574 & ' nsppol =',nsppol ,' nsym =',nsym,& 575 & ' n1xccc =',n1xccc ,' ntypat =',ntypat,ch10,& 576 & ' occopt =',occopt ,' xclevel =',xclevel 577 call wrtout(iout,msg) 578 call wrtout(std_out,msg) 579 580 write(msg,'(4(3(a,i12),a))') & 581 & '- mband =',mband ,' mffmem =',mffmem,& 582 & ' mkmem =',mkmem ,ch10,& 583 & ' mpw =',mpw ,' nfft =',nfft ,& 584 & ' nkpt =',nkpt 585 call wrtout(iout,msg) 586 call wrtout(std_out,msg) 587 588 if (my_natom/=natom)then 589 write(msg,'(a,i10)') 'Pmy_natom=',my_natom 590 call wrtout(iout,msg) 591 call wrtout(std_out,msg) 592 end if 593 594 !Additional information if imgmov is activated (use of replicas of the cell) 595 if (nimage>1) then 596 write(msg,'(1(a,i10))' ) ' nimage =',nimage 597 call wrtout(iout,msg) 598 call wrtout(std_out,msg) 599 end if 600 601 !Additional information on FFT grids if PAW 602 if (usepaw==1) then 603 write(msg, '(a,a,a,i10,a,i10)' )& 604 & ' PAW method is used; the additional fine FFT grid is defined by:',ch10,& 605 & ' mgfftf=',mgfftf,' nfftf =',nfftf 606 call wrtout(iout,msg) 607 call wrtout(std_out,msg) 608 end if 609 610 !Additional information if GPU 611 if (gpu_option/=ABI_GPU_DISABLED) then 612 ! write(msg, '(a)' )' GPU method is used' 613 ! call wrtout(iout,msg) 614 ! call wrtout(std_out,msg) 615 end if 616 617 !Additional information needed for the susceptibility and dielectric matrices 618 if((modulo(iprcel,100)>=20.and.modulo(iprcel,100)<70) .or. iscf==-1)then 619 620 ! Compute the number of bands in blocks (nbnd_in_blk) from mband (see suskmm.f) 621 ! Consider that if the number of bands is large, there are at most 8 blocks 622 if(mband>=48)then 623 mblk=8 624 nbnd_in_blk=(mband-1)/mblk+1 625 ! If the number of bands is medium, place 6 bands per block 626 else if(mband>=12)then 627 nbnd_in_blk=6 628 ! Otherwise, must have at least 2 blocks 629 else 630 mblk=2 631 nbnd_in_blk=(mband-1)/mblk+1 632 end if 633 634 write(msg, '(a,a,a,i10,a,i6,a,i10,a,i10)' )& 635 & ' For the susceptibility and dielectric matrices, or tddft :',ch10,& 636 & ' mgfft =',mgfftdiel,' nbnd_in_blk=',nbnd_in_blk,' nfft =',nfftdiel,& 637 & ' npw =',npwdiel 638 call wrtout(iout,msg) 639 call wrtout(std_out,msg) 640 ndiel4=ngfftdiel(4) ; ndiel5=ngfftdiel(5) ; ndiel6=ngfftdiel(6) 641 ndiel456=ndiel4*ndiel5*ndiel6 642 else 643 ! To be sure of initialisation. 644 ndiel456 = 1 645 end if 646 647 write(msg,'(80a)') ('=',mu=1,80) 648 call wrtout(iout,msg) 649 call wrtout(std_out,msg) 650 651 if(getcell>0 .or. (getcell<0 .and. idtset+getcell>0) )then 652 write(msg,'(a,a,a,a,a,a,i3,a,i3,a,a,a,a,a,a)' )ch10,& 653 & ' memory : COMMENT -',ch10,& 654 & ' The determination of memory needs at this stage is meaningless,',ch10,& 655 & ' since getcell = ',getcell,' is non-zero, while idtset=',idtset,'.',ch10,& 656 & ' The following numbers are obtained by supposing that acell and rprim',ch10,& 657 & ' are NOT taken from a previous dataset. You cannot rely on them.',ch10 658 call wrtout(iout,msg) 659 call wrtout(std_out,msg) 660 end if 661 662 !Compute number of atoms per type for current proc 663 nattyp(:)=0 664 do ii=1,natom 665 nattyp(typat(ii))=nattyp(typat(ii))+1 666 end do 667 668 !PAW: store useful dims 669 if (usepaw==1) then 670 ABI_MALLOC(basis_size,(npsp)) 671 ABI_MALLOC(l_size,(npsp)) 672 ABI_MALLOC(lmn_size,(npsp)) 673 ABI_MALLOC(lmn2_size,(npsp)) 674 ABI_MALLOC(mesh_size,(npsp)) 675 ABI_MALLOC(shape_type,(npsp)) 676 ABI_MALLOC(pawver,(npsp)) 677 ABI_MALLOC(rshp,(npsp)) 678 do ii=1,npsp 679 basis_size(ii)=pspheads(ii)%pawheader%basis_size 680 mesh_size(ii)=pspheads(ii)%pawheader%mesh_size 681 l_size(ii)=pspheads(ii)%pawheader%l_size 682 lmn_size(ii)=pspheads(ii)%pawheader%lmn_size 683 lmn2_size(ii)=lmn_size(ii)*(lmn_size(ii)+1)/2 684 pawver(ii)=pspheads(ii)%pawheader%pawver 685 rshp(ii)=pspheads(ii)%pawheader%rshp 686 shape_type(ii)=pspheads(ii)%pawheader%shape_type 687 end do 688 l_max=maxval(pspheads(:)%lmax) 689 l_size_max=maxval(pspheads(:)%pawheader%l_size) 690 rhoij_nspden=nspden;if (pawspnorb>0) rhoij_nspden=4 691 ABI_MALLOC(my_nattyp,(ntypat)) 692 if ((mpi_enreg%nproc_atom<=1).or.(.not.associated(mpi_enreg%my_atmtab))) then 693 my_nattyp=nattyp 694 else 695 my_nattyp=0 696 do ii=1,my_natom 697 jj=typat(mpi_enreg%my_atmtab(ii)) 698 my_nattyp(jj)=my_nattyp(jj)+1 699 end do 700 end if 701 qphase_rhoij=merge(2,1,any(qphon(:)>tol8)) 702 else 703 ! Do the allocation to avoid uninitialised variables. 704 ABI_MALLOC(my_nattyp,(1)) 705 ABI_MALLOC(basis_size,(1)) 706 ABI_MALLOC(l_size,(1)) 707 ABI_MALLOC(lmn_size,(1)) 708 ABI_MALLOC(lmn2_size,(1)) 709 ABI_MALLOC(mesh_size,(1)) 710 ABI_MALLOC(shape_type,(1)) 711 ABI_MALLOC(pawver,(1)) 712 ABI_MALLOC(rshp,(1)) 713 rhoij_nspden=nspden 714 l_size_max=1 715 l_max=1 716 qphase_rhoij=1 717 end if 718 719 n_fftgr=1;iscf10=mod(iscf,10) 720 if(iscf10==1) n_fftgr=5 721 if(iscf10==2) n_fftgr=3 722 if(iscf10==3) n_fftgr=4 723 if(iscf10==4) n_fftgr=6 724 if(iscf10==5.or.iscf10==6) n_fftgr=10 725 if(iscf10==7) n_fftgr=2+2*npulayit 726 727 !work1 and work2 in fourdp : take into account approximately fftalgb 728 fftalgb=mod(ngfft(7),100)/10 729 if(fftalgb==0)narr_fourdp=2*2 730 if(fftalgb==1)narr_fourdp=2 731 732 ngrad=1;if(xclevel==2.or.tfkinfunc>10)ngrad=2 733 734 !(1) in main, driver, gstate and brdmin ---------------- 735 !in move, nothing interesting is allocated. 736 !kg (gstate) 737 cmpw(1)=3*mkmem ; dttyp(1)=4 738 !indsym (gstate) 739 cadd(3)=4*nsym*natom ; dttyp(3)=4 740 !irrzon (gstate) 741 if(nsym/=1)then 742 cfft(4)=2*((nspden/nsppol)-3*(nspden/4)) ; dttyp(4)=4 743 end if 744 !ylm (gstate) 745 cmpw(5)=mkmem*mpsang*mpsang*useylm ; dttyp(5)=8 746 ! 747 !rhor,rhog (gstate) 748 cfftf(5)=nspden+2 ; dttyp(5)=8 749 !cg (gstate) 750 cmpw(6)=2*nspinor*mband*mkmem*nsppol ; dttyp(6)=8 751 !eigen,resid,occ (occ is initialized in abinit, and not in driver) 752 cadd(7)=3*mband*nkpt*nsppol ; dttyp(7)=8 753 !qgrid_vl,qgrid_ff,vlspl,ffspl 754 cadd(8)=mqgrid_vl*(1+2*ntypat) & 755 & +mqgrid_ff*(1+2*ntypat*lnmax) & 756 & ; dttyp(8)=8 757 !ph1d (actually allocated in scfcv !!) 758 cadd(9)=2*3*(2*mgfft+1)*natom ; dttyp(9)=8 759 cadd(9)=cadd(9)+2*3*(2*mgfftf+1)*natom*usepaw !Additional ph1df for PAW 760 !phnons (in gstate) 761 if(nsym/=1)then 762 cfft(10)=2*((nspden/nsppol)-3*(nspden/4)) ; dttyp(10)=8 763 end if 764 !xccc1d (in driver) 765 cadd(11)=n1xccc*6*ntypat ; dttyp(11)=8 766 767 !hessin in brdmin 768 if(ionmov==2)then 769 cadd(15)=3*natom*3*natom ; dttyp(15)=8 770 end if 771 772 !Additional PAW arrays 773 !PAW datasets (pawtab) 774 if (usepaw==1) then 775 dttyp(16)=8 ; dttyp(17)=4 776 do ii=1,npsp 777 cadd(16)=cadd(16)+2*mesh_size(ii)*basis_size(ii) !phi,tphi 778 cadd(16)=cadd(16)+2*mesh_size(ii)*basis_size(ii)& !phiphj,tphiphj 779 & *(basis_size(ii)+1)/2 780 cadd(16)=cadd(16)+mesh_size(ii)*l_size(ii) !shapefunc 781 cadd(16)=cadd(16)+lmn2_size(ii)*l_size(ii)**2 !qijl 782 cadd(16)=cadd(16)+l_size(ii)*5 !gnorm,shape_a,shape_q 783 cadd(16)=cadd(16)+lmn2_size(ii)*(4+lmn2_size(ii)) !eijkl,dltij,dij0,rhoij0,sij 784 cadd(17)=cadd(17)+lmn2_size(ii)*8 !indklmn 785 cadd(16)=cadd(16)+mesh_size(ii)*5 !coreden,tcoreden,rad,radfact,simfact 786 if (shape_type(ii)==-1) cadd(16)=cadd(16)+4*mesh_size(ii)*l_size(ii) !dshpfunc 787 cadd(16)=cadd(16)+mqgrid_vl*2 !tncorespl 788 if (pawver(ii)>=4) cadd(16)=cadd(16)+mqgrid_vl*2 !tnvalespl 789 end do 790 ! additional arrays 791 cadd(16)=cadd(16)+l_size_max*2*l_max*nsym !zarot 792 cadd(16)=cadd(16)+(2*l_max-1)**2*l_max**2*(l_max**2+1)/2 !realgnt 793 cadd(17)=cadd(17)+nfft+nfftf ! fintocoa,coatofin 794 do ii=1,ntypat 795 cadd(16)=cadd(16)+my_nattyp(ii)*lmn2_size(ii)*rhoij_nspden*pawcpxocc ! Rhoij and related data 796 cadd(17)=cadd(17)+my_nattyp(ii)*(2+lmn2_size(ii)) ! (rhoijselect, ...) 797 end do 798 !PAW:cprj 799 do ii=1,ntypat 800 cadd(16)=cadd(16)+2*nattyp(ii)*nkpt*nspinor*mband*nsppol*lmn_size(ii)/max(mpi_enreg%nproc_band,1) 801 end do 802 end if 803 804 !SCF history (if selected) 805 if (abs(densfor_pred)==5.or.abs(densfor_pred)==6) then ! scf_history... 806 histsz=2 807 cfftf(18)=nspden*(histsz+1)+1 ; dttyp(18)=8 ! %deltarhor, %atmrho_last, %rhor_last 808 cadd(19)=3*natom*2*histsz ; dttyp(19)=8 ! %xreddiff,xred_last 809 dttyp(20)=4 810 if (usepaw==1) then 811 do ii=1,ntypat 812 cadd(19)=cadd(19)+histsz*2*my_nattyp(ii)*lmn2_size(ii)*rhoij_nspden*qphase_rhoij*pawcpxocc ! %pawrhoij()%rhoijp 813 cadd(20)=cadd(20)+histsz*2*my_nattyp(ii)*(2+lmn2_size(ii))*nspden ! %pawrhoij()%rhoijselect 814 end do 815 end if 816 if (extrapwf>0) then 817 cadd(19)=cadd(19)+histsz*2*nspinor*mband*mkmem*nsppol ; dttyp(19)=8 ! %cg 818 end if 819 end if 820 821 !(2) in scfcv---------------------------------------- 822 823 !vhartr,vpsp,vtrial,vxc 824 cfftf(21)=2+2*nspden ; dttyp(21)=8 825 !kxc 826 if (abs(densfor_pred)>0.and.iscf>=10) then 827 cfftf(21)=cfftf(21)+3*nspden 828 if (densfor_pred<0.and.xclevel==2) cfftf(21)=cfftf(21)+20*nspden 829 end if 830 if(iscf>0)then 831 ! f_fftgr 832 if (pawmixdg==1) then 833 cfftf(22)=nspden*n_fftgr*mffmem; dttyp(22)=8 834 else 835 cfft(22)=nspden*n_fftgr*mffmem; dttyp(22)=8 836 end if 837 end if 838 if( iscf>0 .and. (modulo(iprcel,100)>=20.and.modulo(iprcel,100)<70))then 839 ! dielinv, susmat 840 cadd(23)=4*(npwdiel*min(nspden,2))**2; dttyp(23)=8 841 end if 842 !Kernel of Poisson's solver 843 if (icoulomb == 1) then 844 cadd(24) = ngfft(4)*ngfft(5)*ngfft(6) ; dttyp(24) = 8 845 end if 846 if( (iscf>0 .and. modulo(iprcel,100)>=20 .and. modulo(iprcel,100)<70) .or. iscf==-1 )then 847 ! kg_diel 848 cadd(27)=3*npwdiel ; dttyp(27)=4 849 if(nsym/=1)then 850 ! irrzondiel 851 cadd(27)=cadd(27)+2*nfftdiel*(nspden/nsppol) 852 ! phnonsdiel 853 cadd(28)=2*nfftdiel*(nspden/nsppol) ; dttyp(28)=8 854 end if 855 end if 856 if(n1xccc/=0)then 857 ! xccc3d 858 cfftf(29)=1 ; dttyp(29)=8 859 end if 860 861 !Additional PAW arrays 862 dttyp(25)=8 ; dttyp(26)=4 863 if (usepaw==1) then 864 do ii=1,ntypat 865 jj=(1+int(nfftf*four_pi*rshp(ii)**3/(three*ucvol))) ! pawfgrtab 866 cadd(26)=cadd(26)+my_nattyp(ii)*jj ! %ifftsph 867 cadd(25)=cadd(25)+my_nattyp(ii)*jj*(1-pawstgylm)*3 ! %rfgd (if pawstgylm=0) 868 cadd(25)=cadd(25)+my_nattyp(ii)*jj*pawstgylm*l_size(ii)**2 ! %gylm (if pawstgylm=1) 869 if (optforces==1) cadd(25)=cadd(25)+my_nattyp(ii)*jj& ! %gylmgr,%rfgd (if pawstgylm=1) 870 & *pawstgylm*(3*l_size(ii)**2+3*optstress) 871 cadd(26)=cadd(26)+my_nattyp(ii)*l_size(ii)**2/32 ! lmselect !now a boolean 872 cadd(25)=cadd(25)+my_nattyp(ii)*lmn2_size(ii)*nspinor**3 ! dij 873 if (iscf>0) then 874 cadd(25)=cadd(25)+my_nattyp(ii)*lmn2_size(ii)*rhoij_nspden*pawcpxocc ! rhoijres 875 cadd(25)=cadd(25)+my_nattyp(ii)*lmn2_size(ii)*rhoij_nspden*pawcpxocc*n_fftgr*mffmem ! f_paw 876 end if 877 end do 878 ! cadd(25)=cadd(25)+(1+3*pawnhatxc*(ngrad/2))*nspden*nfftf !nhat,nhatgr 879 cfftf(29)=cfftf(29)+(1+3*pawnhatxc*(ngrad/2))*nspden !nhat,nhatgr 880 end if 881 882 !(3) in rhotoxc, xcden ------------------------------- 883 884 if(xclevel/=0)then 885 if(n1xccc/=0)then 886 ! rhocorval 887 cfftf(31)=nspden ; dttyp(31)=8 888 end if 889 ! dnexcdn, rhonow 890 nspgrad=nspden*ngrad 891 if(nspden==2 .and. ngrad==2)nspgrad=5 892 cfftf(32)=nspden*ngrad*ngrad+nspgrad ; dttyp(32)=8 893 if(intxc==1 .or. ngrad==2)then 894 ! wkcmpx,work in xcden +work1,work2 in fourdp 895 cfftf(33)=3+narr_fourdp ; dttyp(33)=8 896 cadd(33)=narr_fourdp*(ngfftf(4)*ngfftf(5)*ngfftf(6)-nfftf) 897 end if 898 if(ngrad==2)then 899 ! workgr in xcden 900 cfftf(34)=2 ; dttyp(34)=8 901 end if 902 end if 903 if(iscf>0)then 904 ! In this case, rhotoxc is called from rhotov also, 905 ! for which vresid was allocated in scfcv 906 ! vresid 907 cfftf(35)=nspden ; dttyp(35)=8 908 end if 909 !Poisson's solver with zero padding 910 if (icoulomb == 1) then 911 cfft(36) = 8 ; dttyp(36) = 8 912 cadd(36) = ngfft(4) * ngfft(5) * ngfft(6) - nfft 913 end if 914 915 !Note : in hartre, called by rhotoxc, one uses 916 !2 dp arrays of total size 3*nfft, 917 !and 2 arrays of total size 4*n4*n5*n6 for fourdp 918 !This will be smaller than the total use for symrhg 919 920 !(4) in newvtr/newrho -------------------------------------- 921 922 if(iscf>0)then 923 ! vresid (allocated in scfcv) and vrespc 924 if (pawmixdg==1) then 925 cfftf(41)=2*nspden ; dttyp(41)=8 926 else 927 cfft(41)=2*nspden ; dttyp(41)=8 928 end if 929 if(mffmem==0)then 930 ! f_fftgr_disk 931 if (pawmixdg==1) then 932 cfftf(42)=nspden*n_fftgr ; dttyp(42)=8 933 else 934 cfft(42)=nspden*n_fftgr ; dttyp(42)=8 935 end if 936 ! f_paw_disk 937 if (usepaw==1) then 938 dttyp(43)=8 939 do ii=1,ntypat 940 cadd(43)=cadd(43)+my_nattyp(ii)*lmn2_size(ii)*nspden*n_fftgr 941 end do 942 end if 943 end if 944 ! rhoupdn, n(v)resid0, vtrialg, rhog2, magng 945 if (pawmixdg==1) then 946 cfftf(43)=2*nspden ; dttyp(43)=8 947 else 948 cfft(43)=2*nspden ; dttyp(43)=8 949 if (nspden>1) cfftf(43)=2*(nspden-1) 950 end if 951 end if 952 953 !(5-6) in vtorho----------------------------------------- 954 955 !Note : (5) is for the arrays inside the spin and k-point loop 956 !they belong to the main chain 957 !(6) is for the arrays after the spin and k-point loop 958 !(6a) is for the arrays after that loop, for the parallel k-point chain 959 !(6b) is for the arrays in mkrho, for the mkrho chain 960 !(6c) is for the arrays in symrhg, for the fourdp chain 961 !(6d) is for the arrays in suscep, for the suscep chain, see (10) 962 !(6e) is for the arrays in dielmt, for the dielmt chain, see (11) 963 !(6f) is for the arrays in pawmkrhoij 964 965 !eknlk, enlxnk, grnlnk 966 cadd(51)=(11+3*natom)*mband*nkpt*nsppol & 967 & ; dttyp(51)=8 968 !kg_k 969 cmpw(52)=3 ; dttyp(52)=4 970 !rhoaug,vlocal 971 cfft(53)=2 ; dttyp(53)=8 972 cadd(53)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft) 973 !rhowfr,rhowfg 974 cfft(53)=cfft(53)+2+nspden 975 if(mkmem==0)then 976 ! cg_disk 977 cmpw(54)=2*nspinor*mband ; dttyp(54)=8 978 end if 979 !eig_k, ek_k, enlx_k, grnl_k, occ_k, resid_k 980 cadd(56)=(14+3*natom)*mband ; dttyp(56)=8 981 !ylm_k 982 cmpw(57)=mpsang*mpsang*useylm ; dttyp(57)=8 983 !!PAW:cprj 984 ! if (usepaw==1) then 985 ! dttyp(58)=8 986 ! do ii=1,ntypat 987 ! cadd(58)=cadd(58)+2*nattyp(ii)*nkpt*nspinor*mband*nsppol*lmn_size(ii)/max(mpi_enreg%nproc_band,1) 988 ! end do 989 ! end if 990 991 !(6) in vtorho---------------------------------------- 992 993 !doccde 994 cadd(60)=mband*nkpt*nsppol ; dttyp(60)=8 995 996 !(6a) in vtorho---------------------------------------- 997 if(xmpi_paral==1)then 998 ! Parallel case 999 ! buffer1 1000 ! buffer2 1001 if(occopt>=3 .and. occopt <=8) then 1002 dttyp(61)=8 1003 if(nsppol*nfft >= (13+3*natom)*mband*nkpt*nspden)then 1004 cfft(61)=2*nspden 1005 else 1006 cadd(61)=(13+3*natom)*mband*nkpt*nspden 1007 end if 1008 else 1009 cfft(61)=2*nspden ; dttyp(61)=8 1010 cadd(61)=9+3*natom+2+2*mband*nkpt*nspden 1011 end if 1012 end if 1013 1014 1015 !(6b) in mkrho, called by vtorho-------------------------- 1016 if(occopt>=3 .and. occopt <=8)then 1017 if(mkmem==0)then 1018 ! cg_disk 1019 cmpw(62)=2*nspinor*mband ; dttyp(62)=8 1020 end if 1021 ! cwavef 1022 cmpw(65)=2*nspinor ; dttyp(65)=8 1023 1024 ! rhoaug, wfraug, work1 in fourwf 1025 cfft(66)=5 ; dttyp(66)=8 1026 cadd(66)=5*(ngfft(4)*ngfft(5)*ngfft(6)-nfft) 1027 end if 1028 1029 !(6c) in symrhg, called by vtorho-------------------------- 1030 if(iscf>0)then 1031 cfft(67)=narr_fourdp ; dttyp(67)=8 1032 cadd(67)=narr_fourdp*(ngfft(4)*ngfft(5)*ngfft(6)-nfft) 1033 if(nsym>1)then 1034 ! work1 in symrhg 1035 cfft(68)=2 ; dttyp(68)=8 1036 cadd(68)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft) 1037 end if 1038 end if 1039 1040 1041 !(6d) and (6e) in suscep and dielmt, called by vtorho, 1042 !see (10) and (11) ------------------------------- 1043 1044 !(6f) in pawmkrhoij or pawrhoij_symrhoij called by pawmkrho, called by vtorho-------- 1045 !only when paralellim over atoms is activated 1046 dttyp(63)=8 1047 if((usepaw==1) .and. ((iscf>0) .or. (iscf == -3) .and. mpi_enreg%nproc_atom>1 ))then 1048 do ii=1,ntypat 1049 cadd(63)=cadd(63)+nattyp(ii)*lmn2_size(ii)*rhoij_nspden*pawcpxocc*qphase_rhoij ! Rhoij_gather and related data 1050 cadd(63)=cadd(63)+nattyp(ii)*(2+lmn2_size(ii)) ! Rhoij_gather (rhoijselect, ...) 1051 end do 1052 end if 1053 1054 !(7) in vtowfk---------------------------------------- 1055 1056 !evec 1057 cadd(71)=2*mband*mband ; dttyp(71)=8 1058 !subham, subvnlx(if not PAW or if usefock_ACE) 1059 cadd(72)=(1+usepaw)*mband*(mband+1) ; dttyp(72)=8 1060 !gkpsq 1061 cmpw(73)=1 ; dttyp(73)=8 1062 !ffnl 1063 cmpw(74)=2*ntypat*lmnmax ; dttyp(74)=8 1064 !ph3d 1065 matblk=min(NLO_MINCAT,maxval(nattyp)) 1066 if(nloalg(2)<=0)matblk=natom 1067 cmpw(75)=2*matblk ; dttyp(75)=8 1068 !gsc(if PAW) 1069 ! cmpw(76)=2*mband*nspinor*usepaw ; dttyp(76)=8 1070 !Note : matvnl and mat1 do not belong to a chain defined until now 1071 ! 1072 if(occopt<3 .and. iscf>0)then 1073 ! cwavef 1074 cmpw(77)=2*nspinor ; dttyp(77)=8 1075 ! wfraug 1076 cfft(78)=2 ; dttyp(78)=8 1077 cadd(78)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft) 1078 ! work1 in fourwf 1079 cfft(79)=2 ; dttyp(79)=8 1080 cadd(79)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft) 1081 end if 1082 1083 1084 !(8) in cgwf_cprj------------------------------------- 1085 1086 !conjgr, direc, direc_tmp, gvnlx 1087 cmpw(81)=2*4*nspinor ; dttyp(81)=8 1088 ! cwavef_r,direc_r 1089 cfft(82)=2*2*nspinor ; dttyp(82)=8 1090 1091 !(8) in cgwf------------------------------------------ 1092 1093 !!conjgr, cwavef, direc, gh_direc, gvnlx_direc 1094 ! cmpw(81)=2*5*nspinor ; dttyp(81)=8 1095 !!ghc,gvnlxc 1096 ! cmpw(82)=2*2*nspinor ; dttyp(82)=8 1097 !!PAW: scwavef,direc_tmp,ghc_all 1098 ! cmpw(83)=2*(2+mband)*nspinor*usepaw ; dttyp(83)=8 1099 1100 1101 !(9a) in getghc and fourwf---------------------------- 1102 1103 !work (in getghc) 1104 cfft(91)=2 ; dttyp(91)=8 1105 cadd(92)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft) 1106 !work1 (in fourwf) 1107 cfft(92)=2 ; dttyp(92)=8 1108 cadd(92)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft) 1109 1110 !(9b) in getghc, nonlop and opernl-------------------- 1111 mincat=min(NLO_MINCAT,natom-ntypat+1) 1112 if (useylm==0) then ! ===== nonlop_pl 1113 ! gxa (in nonlop) 1114 cadd(94)=2*20*mincat*2 ; dttyp(94)=8 1115 ! dgxdt (in nonlop) !MT20072002: not allocated in getghc !! 1116 if (optforces==1) then 1117 cadd(95)=2*3*20*mincat*2 ; dttyp(95)=8 1118 end if 1119 ! teffv (in opernl4 - no distinction is made for opernl, opernl2 or opernl3) 1120 ! kpgx, ffkg 1121 ! here, evaluate an upper value, with nproj=2, p,d and f orbitals, but not 1122 ! considering the stress, since it will be called outside of the main chain 1123 cadd(97)=NLO_MBLKPW*40 ; dttyp(97)=8 1124 ! kpg if nloalg(3)=1 1125 cadd(98)=3*mpw*nloalg(3) ; dttyp(98)=8 1126 else ! ===== nonlop_ylm 1127 ! gx + gxfac + gxfac_sij 1128 ! cadd(94)=2*lmnmax*mincat*(mpw+1+usepaw) ; dttyp(94)=8 1129 cmpw(94)=2*lmnmax*mincat ; dttyp(94)=8 1130 cadd(99)=2*lmnmax*mincat*(1+usepaw) ; dttyp(99)=8 1131 ! kpg 1132 cmpw(95)=3 ; dttyp(95)=8 1133 ! indlmn_typ, ffnl_typ 1134 cadd(96)=lmnmax*6; dttyp(96)=4 1135 ! ffnl_typ 1136 cmpw(97)=lmnmax; dttyp(97)=8 1137 ! opernla_ylm: scalar,scali 1138 cmpw(98)=2; dttyp(98)=8 1139 end if 1140 1141 !(10) in suscep and suskmm ---------------------------- 1142 1143 if(modulo(iprcel,100)>=20.and.modulo(iprcel,100)<70)then 1144 ! Variables allocated in suscep 1145 if(mkmem==0)then 1146 ! cg_disk 1147 cmpw(101)=2*mband ; dttyp(101)=8 1148 end if 1149 if(occopt>=3)then 1150 ! drhode 1151 cadd(103)=2*npwdiel*nsppol ; dttyp(103)=8 1152 end if 1153 ! rhoextrap (always included, although it appears only when extrap==1) 1154 cadd(104)=ndiel456 ; dttyp(104)=8 1155 1156 ! Variables allocated in suskmm 1157 ! cwavef 1158 cmpw(106)=2 ; dttyp(106)=8 1159 ! rhoaug, wfraug 1160 cadd(107)=3*ndiel456 ; dttyp(107)=8 1161 ! wfprod 1162 cadd(108)=2*npwdiel ; dttyp(108)=8 1163 ! wfrspa1, wfrspa2 1164 cadd(109)=4*ndiel456*nbnd_in_blk ; dttyp(109)=8 1165 1166 end if 1167 1168 !(11) in dielmt --------------------------------------- 1169 1170 if(modulo(iprcel,100)>=20.and.modulo(iprcel,100)<70)then 1171 ! dielh,dielvec,eig_diel,zhpev1,zhpev2 1172 cadd(111)=3*npwdiel*npwdiel & 1173 & +9*npwdiel ; dttyp(111)=8 1174 end if 1175 1176 !(12) in tddft --------------------------------------- 1177 1178 if(iscf==-1)then 1179 if(mkmem/=0)then 1180 ! cg_disk 1181 cmpw(121)=2*mband ; dttyp(121)=8 1182 end if 1183 ! cwavef 1184 cmpw(124)=2*mband ; dttyp(124)=8 1185 ! rhoaug,wfraug,wfrspa 1186 cadd(125)=(2+mband)*ndiel456 ; dttyp(125)=8 1187 end if 1188 1189 !-------------------------------------------------------------------------- 1190 1191 chain(:,:)=.true. 1192 1193 !Define the main chain version a (fourwf) 1194 chain(31:50,1)=.false. 1195 chain(60:70,1)=.false. 1196 chain(77:80,1)=.false. 1197 chain(93:100,1)=.false. 1198 chain(101:marrays,1)=.false. 1199 1200 !Define the main chain version b (nonlop+opernl) 1201 chain(31:50,2)=.false. 1202 chain(60:70,2)=.false. 1203 chain(77:80,2)=.false. 1204 chain(91:92,2)=.false. 1205 chain(101:marrays,2)=.false. 1206 1207 !Define the XC chain ( 31:40 belong only to this chain) 1208 chain(41:marrays,3)=.false. 1209 1210 !Define the mkrho chain ( 62:66 and 76:77 belong only to this chain) 1211 !is it sure that they have to be summed ?) 1212 chain(31:50,4)=.false. 1213 chain(51:59,4)=.false. 1214 chain(61 ,4)=.false. 1215 chain(67:70,4)=.false. 1216 chain(71:marrays,4)=.false. 1217 chain(77:80,4)=.true. 1218 1219 !Define the fourdp chain ( 67:70 belong only to this chain) 1220 chain(31:50,5)=.false. 1221 chain(51:66,5)=.false. 1222 chain(60 ,5)=.true. 1223 chain(71:marrays,5)=.false. 1224 1225 !Define the parallel k-point chain ( 61 belong only to this chain ) 1226 chain(31:50,6)=.false. 1227 chain(51:59,6)=.false. 1228 chain(62:70,6)=.false. 1229 chain(71:marrays,6)=.false. 1230 1231 !Define the newvtr chain ( 41:50 belong only to this chain) 1232 chain(31:40,7)=.false. 1233 chain(51:marrays,7)=.false. 1234 1235 !Define the suscep chain ( 101:110 belong only to this chain) 1236 chain(31:marrays,8)=.false. 1237 chain(60 ,8)=.true. 1238 chain(101:110,8)=.true. 1239 1240 !Define the dielmt chain ( 111:120 belong only to this chain) 1241 chain(31:marrays,9)=.false. 1242 chain(60 ,9)=.true. 1243 chain(111:120,9)=.true. 1244 1245 !Define the tddft chain ( 121:130 belong only to this chain) 1246 chain(31:marrays,10)=.false. 1247 chain(60 ,10)=.true. 1248 chain(121:130,10)=.true. 1249 1250 !The memory needed for each chain has been computed 1251 !------------------------------------------------------------------------- 1252 !Still need some auxiliary data : estimate the disk space 1253 !or the maximum segment size. 1254 1255 !XG030513 : MPIWF need to multiply mbdiskwf by the number of processors 1256 !in the WF group. For the time being, nprocwf=1 1257 nprocwf=mpi_enreg%nproc_fft 1258 1259 mbdiskwf=(8*two*mpw*nprocwf*sum(nband(1:nkpt*nsppol)))/1024._dp**2 + 0.002_dp 1260 mbdiskpd=(8*nfftf*nsppol)/1024._dp**2 + 0.002_dp 1261 1262 !Determine the largest array out of cg (cg_disk), f_fftgr (f_fftgr_disk), or pawfgrtab%gylm 1263 if(mkmem==0)then 1264 mbcg=(8*2*mpw*mband)/1024._dp**2 + 0.002_dp 1265 else 1266 mbcg=(8*2*mpw*mband*mkmem*nsppol)/1024._dp**2 + 0.002_dp 1267 end if 1268 if(mffmem==0)then 1269 if (pawmixdg==1) then 1270 mbf_fftgr=(8*nfftf*n_fftgr)/1024._dp**2 + 0.002_dp 1271 else 1272 mbf_fftgr=(8*nfft*n_fftgr)/1024._dp**2 + 0.002_dp 1273 end if 1274 else 1275 if (pawmixdg==1) then 1276 mbf_fftgr=(8*nfftf*n_fftgr*nsppol*mffmem)/1024._dp**2 + 0.002_dp 1277 else 1278 mbf_fftgr=(8*nfft*n_fftgr*nsppol*mffmem)/1024._dp**2 + 0.002_dp 1279 end if 1280 end if 1281 if(usepaw==1)then 1282 mbgylm=0 1283 do ii=1,ntypat ! pawfgrtab 1284 jj=(1+int(nfftf*four_pi/(three*ucvol)*rshp(ii)**3)) 1285 mbgylm=mbgylm+my_nattyp(ii)*jj & 1286 & *( l_size(ii)**2*pawstgylm & ! %gylm (if pawstgylm=1) 1287 & +3*max((optforces+1)/2,optstress)*l_size(ii)**2*pawstgylm& ! %gylmgr (if pawstgylm=1) 1288 & +3*optstress*pawstgylm& ! %rfgd (if pawstgylm=1) 1289 & +3*(1-pawstgylm) ) ! %rfgd (if pawstgylm=0) 1290 end do 1291 mbgylm=8*mbgylm/1024._dp**2 + 0.002_dp 1292 else 1293 mbgylm=0 1294 end if 1295 1296 !------------------------------------------------------------------------- 1297 ABI_FREE(my_nattyp) 1298 ABI_FREE(basis_size) 1299 ABI_FREE(l_size) 1300 ABI_FREE(lmn_size) 1301 ABI_FREE(lmn2_size) 1302 ABI_FREE(mesh_size) 1303 ABI_FREE(pawver) 1304 ABI_FREE(shape_type) 1305 ABI_FREE(rshp) 1306 1307 !--------------------------------------------------------------------- 1308 !Now, analyze the data 1309 1310 call memana(cadd,cfft,cfftf,chain,cmpw,dttyp,iout,iprcel,iscf,& 1311 & marrays,mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm,mffmem,& 1312 & mpw,natom,nchain,nfft,nfftf,occopt,option,prtvol) 1313 1314 end subroutine memory
m_memeval/memory_eval [ Functions ]
[ Top ] [ m_memeval ] [ Functions ]
NAME
memory_eval
FUNCTION
Big loop on the datasets: - for each of the datasets, write one line about the crystallographic data - compute the memory needs for this data set.
INPUTS
dtsets(0:ndtset_alloc)=<type datafiles_type>contains all input variables iout=unit number of output file mpi_enregs=information about MPI parallelization ndtset= number of datasets to be read; if 0, no multi-dataset mode ndtset_alloc=number of datasets, corrected for allocation of at least one data set. npsp=number of pseudopotentials pspheads(npsp)=<type pspheader_type>all the important information from the pseudopotential file header, as well as the psp file name
OUTPUT
printing only
SOURCE
75 subroutine memory_eval(dtsets,iout,mpi_enregs,ndtset,ndtset_alloc,npsp,pspheads) 76 77 !Arguments ------------------------------------ 78 !scalars 79 integer,intent(in) :: iout,ndtset,ndtset_alloc,npsp 80 type(MPI_type),intent(inout) :: mpi_enregs(0:ndtset_alloc) 81 !arrays 82 type(dataset_type),intent(inout) :: dtsets(0:ndtset_alloc) 83 type(pspheader_type),intent(in) :: pspheads(npsp) 84 85 !Local variables ------------------------------- 86 !scalars 87 integer :: cplex,exchn2n3d,extrapwf,getcell,idtset,ii,intxc,densfor_pred,iprcel 88 integer :: iscf,isym,jdtset,lmnmax,mem_test 89 integer :: lmnmax_eff,lmnmaxso,lnmax,lnmax_eff,lnmaxso,mband 90 integer :: me_fft,mffmem,mgfftdiel,mgfftf,mkmem,mpsang,mpspso 91 integer :: mpssoang,mpw,mqgrid,mqgriddg,mqgrid_ff,mqgrid_vl,n1xccc,natom 92 integer :: nfftdiel,nfftf,nkpt,nproc_fft,nptsgvec,npulayit,npwdiel,nspden,nspinor 93 integer :: nsppol,nsym,ntypat,occopt,optddk,optforces,optphon,optstress 94 integer :: optstrs,paral_fft,pawcpxocc,pawmixdg,pawnhatxc,pawspnorb,pawstgylm,prtvol,ptgroupma,response 95 integer :: spgroup,timrev,usepaw,useylm,gpu_option,xclevel 96 real(dp) :: diecut,dilatmx,ecut,ecut_eff,ecutdg_eff,ecutsus,ucvol 97 !arrays 98 integer :: bravais(11),mkmems(3),ngfftdiel(18) 99 integer :: ngfftf(18),nloalg(3) 100 integer,allocatable :: nband(:),symq(:,:,:),symrec(:,:,:),symrel(:,:,:) 101 real(dp),parameter :: k0(3)=(/zero,zero,zero/) 102 real(dp) :: genafm(3),gmet(3,3),gprimd(3,3),kpt_diel(3),qphon(3),rmet(3,3),rprimd(3,3) 103 104 !************************************************************************* 105 106 do idtset=1,ndtset_alloc 107 if(mpi_enregs(idtset)%me<0) cycle 108 call abi_io_redirect(new_io_comm=mpi_enregs(idtset)%comm_world) 109 call libpaw_write_comm_set(mpi_enregs(idtset)%comm_world) 110 111 ! Initialisations 112 bravais(:)=dtsets(idtset)%bravais(:) 113 exchn2n3d=dtsets(idtset)%exchn2n3d 114 extrapwf=dtsets(idtset)%extrapwf 115 genafm(:) =dtsets(idtset)%genafm(:) 116 getcell=dtsets(idtset)%getcell 117 intxc=dtsets(idtset)%intxc 118 densfor_pred=dtsets(idtset)%densfor_pred 119 iprcel=dtsets(idtset)%iprcel 120 iscf=dtsets(idtset)%iscf 121 jdtset=dtsets(idtset)%jdtset ; if(ndtset==0)jdtset=0 122 me_fft=mpi_enregs(idtset)%me_fft 123 mffmem=dtsets(idtset)%mffmem 124 mpw=dtsets(idtset)%mpw 125 mqgrid=dtsets(idtset)%mqgrid 126 mqgriddg=dtsets(idtset)%mqgriddg 127 natom=dtsets(idtset)%natom 128 nkpt =dtsets(idtset)%nkpt 129 nloalg(:)=dtsets(idtset)%nloalg(:) 130 nproc_fft=mpi_enregs(idtset)%nproc_fft 131 npulayit=dtsets(idtset)%npulayit 132 nspden=dtsets(idtset)%nspden 133 nspinor=dtsets(idtset)%nspinor 134 nsppol=dtsets(idtset)%nsppol 135 nsym =dtsets(idtset)%nsym 136 ntypat=dtsets(idtset)%ntypat 137 occopt=dtsets(idtset)%occopt 138 optforces=dtsets(idtset)%optforces 139 paral_fft=mpi_enregs(idtset)%paral_kgb 140 pawcpxocc=dtsets(idtset)%pawcpxocc 141 pawmixdg=dtsets(idtset)%pawmixdg 142 pawnhatxc=dtsets(idtset)%pawnhatxc 143 pawspnorb=dtsets(idtset)%pawspnorb 144 pawstgylm=dtsets(idtset)%pawstgylm 145 prtvol=dtsets(idtset)%prtvol 146 ptgroupma =dtsets(idtset)%ptgroupma 147 qphon(:)=dtsets(idtset)%qptn(:) 148 spgroup =dtsets(idtset)%spgroup 149 usepaw=dtsets(idtset)%usepaw 150 useylm=dtsets(idtset)%useylm 151 gpu_option=dtsets(idtset)%gpu_option 152 xclevel=dtsets(idtset)%xclevel 153 154 ABI_MALLOC(symrel,(3,3,nsym)) 155 symrel(:,:,1:nsym)=dtsets(idtset)%symrel(:,:,1:nsym) 156 157 ! Space group output 158 call prtspgroup(bravais,genafm,std_out,jdtset,ptgroupma,spgroup) 159 call prtspgroup(bravais,genafm,iout,jdtset,ptgroupma,spgroup) 160 161 if (dtsets(idtset)%toldff>tol16.and.optforces==0) optforces=1 162 if (dtsets(idtset)%tolrff>tol16.and.optforces==0) optforces=1 163 if (dtsets(idtset)%ionmov>tol16.and.optforces==0) optforces=1 164 if (dtsets(idtset)%imgmov>tol16.and.optforces==0) optforces=1 165 optstress=dtsets(idtset)%optstress 166 optddk=0;optphon=0;optstrs=0 167 if (dtsets(idtset)%rfddk>0.or.dtsets(idtset)%rf2_dkdk>0.or.dtsets(idtset)%rf2_dkde>0) optddk=1 168 if (dtsets(idtset)%rfelfd>0.or.dtsets(idtset)%d3e_pert1_elfd>0.or.& 169 & dtsets(idtset)%d3e_pert2_elfd>0.or.dtsets(idtset)%d3e_pert3_elfd>0) optddk=1 170 if (dtsets(idtset)%rfphon>0.or.dtsets(idtset)%d3e_pert1_phon>0.or.& 171 & dtsets(idtset)%d3e_pert2_phon>0.or.dtsets(idtset)%d3e_pert3_phon>0) optphon=1 172 if (dtsets(idtset)%rfstrs>0) optstrs=1 173 174 ABI_MALLOC(nband,(nkpt*nsppol)) 175 nband(1:nkpt*nsppol)=dtsets(idtset)%nband(1:nkpt*nsppol) 176 mband=maxval(nband(1:nkpt*nsppol)) 177 dtsets(idtset)%mband=mband 178 179 ! mpsang=max(maxval(pspheads(1:npsp)%lmax)+1,1) ! Likely problems with the HP compiler 180 ! n1xccc=maxval(pspheads(1:npsp)%xccc) 181 mpsang=1 182 n1xccc=pspheads(1)%xccc 183 do ii=1,npsp 184 mpsang=max(pspheads(ii)%lmax+1,mpsang) 185 n1xccc=max(pspheads(ii)%xccc,n1xccc) 186 end do 187 188 ! Determine the maximum number of projectors, for the set of pseudo atom 189 call getdim_nloc(lmnmax,lmnmaxso,lnmax,lnmaxso,dtsets(idtset)%mixalch_orig,& 190 & dtsets(idtset)%nimage,npsp,dtsets(idtset)%npspalch,ntypat,dtsets(idtset)%ntypalch,pspheads) 191 192 ! Treatment of the effect of using a spin-orbit part 193 ! Warning: mpspso is different for each dataset; not relevant for PAW 194 mpspso=1 195 if (dtsets(idtset)%usepaw==0) then 196 do ii=1,npsp 197 if(nspinor/=1)then 198 if(pspheads(ii)%pspso/=0)then 199 if(dtsets(idtset)%so_psp(ii)/=0)then 200 mpspso=2 201 end if 202 end if 203 end if 204 end do 205 end if 206 ! In case of no spin-orbit 207 if(mpspso==1)then 208 mpssoang=mpsang ; lmnmax_eff =lmnmax; lnmax_eff =lnmax 209 else ! spin-orbit will be used 210 mpssoang=2*mpsang-1 ; lmnmax_eff =lmnmaxso ; lnmax_eff =lnmaxso 211 end if 212 ! lmnmax is not used if the Ylm are not used 213 if (useylm==0) lmnmax_eff =lnmax_eff 214 215 ecut =dtsets(idtset)%ecut 216 dilatmx =dtsets(idtset)%dilatmx 217 ecut_eff=ecut*dilatmx**2 218 ecutdg_eff=dtsets(idtset)%pawecutdg*dtsets(idtset)%dilatmx**2 219 220 ! Compute mgfft,mpw,nfft for this data set 221 call mkrdim(dtsets(idtset)%acell_orig(1:3,1),dtsets(idtset)%rprim_orig(1:3,1:3,1),rprimd) 222 call metric(gmet,gprimd,-1,rmet,rprimd,ucvol) 223 224 if (usepaw==0) then 225 mgfftf=dtsets(idtset)%mgfft;nfftf=dtsets(idtset)%nfft;ngfftf(:)=dtsets(idtset)%ngfft(:) 226 else 227 mgfftf=dtsets(idtset)%mgfftdg;nfftf=dtsets(idtset)%nfftdg;ngfftf(:)=dtsets(idtset)%ngfftdg(:) 228 end if 229 response=0 230 if(dtsets(idtset)%rfddk/=0 .or. dtsets(idtset)%rf2_dkdk/=0 .or. dtsets(idtset)%rf2_dkde/=0 .or. & 231 & dtsets(idtset)%rfphon/=0 .or. dtsets(idtset)%rfelfd/=0 .or. & 232 & dtsets(idtset)%rfstrs/=0 .or. dtsets(idtset)%rfuser/=0 .or. & 233 & dtsets(idtset)%rfmagn/=0 ) response=1 234 235 ! Compute mgfftdiel,npwdiel,nfftdiel for this data set 236 if((modulo(iprcel,100)>=20 .and.modulo(iprcel,100) < 71).or. iscf==-1)then 237 ! Get diecut, and the fft grid to be used for the susceptibility computation 238 diecut=abs(dtsets(idtset)%diecut) 239 if( dtsets(idtset)%diecut < zero )then 240 ecutsus=ecut 241 else 242 ecutsus= ( sqrt(ecut) *0.5_dp + sqrt(diecut) *0.25_dp )**2 243 end if 244 ! Beware, for the dielectric matrix fftalg=ngfftdiel(7) is default here 245 ngfftdiel(1:3)=0 ; ngfftdiel(7)=101 ; ngfftdiel(8:18)=dtsets(idtset)%ngfft(8:18) 246 if(iscf==-1)ngfftdiel(7)=102 247 ecut_eff=ecutsus*dilatmx**2 248 call getng(dtsets(idtset)%boxcutmin,dtsets(idtset)%chksymtnons,& 249 & ecut_eff,gmet,k0,me_fft,mgfftdiel,nfftdiel,& 250 & ngfftdiel,nproc_fft,nsym,paral_fft,symrel,dtsets(idtset)%tnons,& 251 & gpu_option=dtsets(idtset)%gpu_option) 252 ! Compute the size of the dielectric matrix : npwdiel 253 kpt_diel(1:3)=(/ 0.0_dp, 0.0_dp, 0.0_dp /) 254 ecut_eff=diecut*dilatmx**2 255 call getmpw(ecut_eff,exchn2n3d,gmet,(/1/),kpt_diel,mpi_enregs(idtset),npwdiel,1) 256 else 257 npwdiel=1 ; mgfftdiel=1 ; nfftdiel=1 ; ngfftdiel(1:8)=1 258 end if 259 260 ! Special treatment for the value of mqgrid to be fed in memory.F90 261 262 nptsgvec = 200 ! At present, this has to be chosen once and for all ... 263 if ( dtsets(idtset)%usewvl == 0) then 264 call setmqgrid(mqgrid,mqgriddg,ecut_eff,ecutdg_eff,gprimd,nptsgvec,usepaw) 265 else 266 call setmqgrid(mqgrid,mqgriddg,one,one,gprimd,nptsgvec,usepaw) 267 end if 268 mqgrid_ff=mqgrid 269 if (usepaw==0) mqgrid_vl=mqgrid 270 if (usepaw==1) mqgrid_vl=mqgriddg 271 272 ! Compute the memory needs for this data set. 273 if(response==0)then 274 275 if (dtsets(idtset)%usewvl == 0) then 276 mkmem=dtsets(idtset)%mkmem 277 mband=maxval(dtsets(idtset)%nband(1:nkpt*nsppol)) 278 279 ! Don't perform memory tests if MBPT. 280 mem_test = dtsets(idtset)%mem_test 281 if (any(dtsets(idtset)%optdriver == [RUNL_SIGMA, RUNL_SCREENING, RUNL_BSE, RUNL_EPH, RUNL_GWR])) mem_test = 0 282 283 call memory(n1xccc,extrapwf,getcell,idtset,dtsets(idtset)%icoulomb,& 284 & intxc,dtsets(idtset)%ionmov,iout,densfor_pred,& 285 & iprcel,iscf,jdtset,lmnmax_eff,lnmax_eff,mband,mffmem,dtsets(idtset)%mgfft,mgfftdiel,mgfftf,mkmem,& 286 & mpi_enregs(idtset),mpsang,mpssoang,mpw,mqgrid_ff,mqgrid_vl,natom,nband,dtsets(idtset)%nfft,nfftdiel,nfftf,& 287 & dtsets(idtset)%ngfft,ngfftdiel,ngfftf,dtsets(idtset)%nimage,nkpt,nloalg,npsp,npulayit,npwdiel,nspden,nspinor,& 288 & nsppol,nsym,ntypat,occopt,optforces,mem_test,optstress,pawcpxocc,pawmixdg,& 289 & pawnhatxc,pawspnorb,pawstgylm,prtvol,pspheads,qphon,dtsets(idtset)%tfkinfunc,& 290 & dtsets(idtset)%typat,ucvol,usepaw,useylm,gpu_option,xclevel) 291 else if( dtsets(idtset)%usepaw==0) then 292 if (mpi_enregs(idtset)%me == 0) then 293 call wvl_memory(dtsets(idtset), idtset, mpi_enregs(idtset), npsp, 1, pspheads) 294 end if 295 end if 296 297 else 298 ! Compute the value of cplex, for which one needs symrec 299 ABI_MALLOC(symq,(4,2,nsym)) 300 ABI_MALLOC(symrec,(3,3,nsym)) 301 do isym=1,nsym 302 call mati3inv(symrel(:,:,isym),symrec(:,:,isym)) 303 end do 304 call littlegroup_q(nsym,qphon,symq,symrec,dtsets(idtset)%symafm,timrev) 305 cplex=2-timrev 306 ABI_FREE(symq) 307 ABI_FREE(symrec) 308 mkmems(1)=dtsets(idtset)%mkmem 309 mkmems(2)=dtsets(idtset)%mkqmem 310 mkmems(3)=dtsets(idtset)%mk1mem 311 312 mem_test = dtsets(idtset)%mem_test 313 314 call memorf(cplex,n1xccc,getcell,idtset,intxc,iout,iprcel,& 315 & iscf,jdtset,lmnmax_eff,lnmax_eff,mband,mffmem,dtsets(idtset)%mgfft,& 316 & mkmems,mpi_enregs(idtset),mpsang,mpssoang,mpw,mqgrid_ff,natom,nband,dtsets(idtset)%nfft,& 317 & dtsets(idtset)%ngfft,nkpt,nloalg,nspden,nspinor,nsppol,nsym,& 318 & ntypat,occopt,optddk,optphon,mem_test,optstrs,prtvol,useylm,gpu_option,xclevel) 319 end if 320 321 ! Deallocate temporary arrays (when they will really be temporary !) 322 ABI_FREE(nband) 323 ABI_FREE(symrel) 324 325 end do ! idtset 326 327 end subroutine memory_eval
m_memeval/setmqgrid [ Functions ]
[ Top ] [ m_memeval ] [ Functions ]
NAME
setmqgrid
FUNCTION
Sets the number of points needed to represent the pseudopotentials in reciprocal space for a specified resolution.
INPUTS
ecut=cutoff energy for the wavefunctions ecutdg=cutoff energy for the fine grid in case usepaw==1 gprimd=primitive translation vectors for reciprocal space nptsgvec=number of points along the smallest primitive translation vector of the reciprocal space usepaw=1 if PAW is used, 0 otherwise
OUTPUT
SOURCE
2400 subroutine setmqgrid(mqgrid,mqgriddg,ecut,ecutdg,gprimd,nptsgvec,usepaw) 2401 2402 !Arguments ------------------------------------ 2403 integer , intent(inout) :: mqgrid,mqgriddg 2404 integer , intent(in) :: nptsgvec,usepaw 2405 real(dp), intent(in) :: ecut,ecutdg 2406 real(dp), intent(in) :: gprimd(3,3) 2407 2408 !Local variables------------------------------- 2409 integer :: mqgrid2,mqgriddg2 2410 real(dp) :: gmax,gmaxdg,gvecnorm 2411 character(len=500) :: msg 2412 2413 ! ************************************************************************* 2414 2415 gvecnorm=sqrt(min(dot_product(gprimd(:,1),gprimd(:,1)), & 2416 & dot_product(gprimd(:,2),gprimd(:,2)), & 2417 & dot_product(gprimd(:,3),gprimd(:,3)))) 2418 gmax=one/(sqrt2*pi)*sqrt(ecut) 2419 2420 if (mqgrid == 0) then 2421 mqgrid2=ceiling(gmax/gvecnorm*nptsgvec) 2422 mqgrid=max(mqgrid2,3001) 2423 write(msg, '(5a,i0,a)' )& 2424 & 'The number of points "mqgrid" in reciprocal space used for the',ch10,& 2425 & 'description of the pseudopotentials has been set automatically',ch10,& 2426 & 'by abinit to: ',mqgrid,'.' 2427 !ABI_COMMENT(msg) 2428 else 2429 mqgrid2=ceiling(gmax/gvecnorm*nptsgvec) 2430 if (mqgrid2>mqgrid) then 2431 write(msg, '(3a,i8,3a,i8,3a)' )& 2432 & 'The number of points "mqgrid" in reciprocal space used for the',ch10,& 2433 & 'description of the pseudopotentials is : ',mqgrid,'.',ch10,& 2434 & 'It would be better to increase it to at least ',mqgrid2,', or',ch10,& 2435 & 'let abinit choose it automatically by setting mqgrid = 0.' 2436 ABI_WARNING(msg) 2437 end if 2438 end if 2439 2440 if (usepaw==1) then 2441 if(ecutdg<tol6)then 2442 write(msg,'(a)')'The value of (paw)ecutdg is zero or negative, which is forbidden.' 2443 ABI_ERROR(msg) 2444 end if 2445 gmaxdg=one/(sqrt2*pi)*sqrt(ecutdg) 2446 if (mqgriddg == 0) then 2447 mqgriddg2=ceiling(gmaxdg/gvecnorm*nptsgvec) 2448 mqgriddg=max(mqgriddg2,3001) 2449 write(msg, '(5a,i0,a)' )& 2450 & 'The number of points "mqgriddg" in reciprocal space used for the',ch10,& 2451 & 'description of the pseudopotentials has been set automatically',ch10,& 2452 & 'by abinit to: ',mqgriddg,'.' 2453 !ABI_COMMENT(msg) 2454 else 2455 mqgriddg2=ceiling(gmax/gvecnorm*nptsgvec) 2456 if (mqgriddg2>mqgriddg) then 2457 write(msg, '(3a,i8,3a,i8,3a)' )& 2458 & 'The number of points "mqgriddg" in reciprocal space used for the',ch10,& 2459 & 'description of the pseudopotentials (fine grid) is :',mqgriddg,'.',ch10,& 2460 & 'It would be better to increase it to at least ',mqgriddg2,', or',ch10,& 2461 & 'let abinit choose it automatically by setting mqgrid = 0.' 2462 ABI_WARNING(msg) 2463 end if 2464 end if 2465 end if 2466 2467 end subroutine setmqgrid
m_memeval/wvl_memory [ Functions ]
[ Top ] [ m_memeval ] [ Functions ]
NAME
wvl_memory
FUNCTION
Estimation of the memory needed for waelet based computation job. According to the value of the option variable, might also try to allocate this amount of memory, and if it fails, might estimate the available memory.
INPUTS
dtset=<type datafiles_type>contains all input variables. idtset=number of the current dataset mpi_enreg=information about MPI parallelization npsp=number of pseudopotentials option: if 0, no test of available memory if 1, the routine tries to allocate the estimated memory, for testing purposes, and if a failure occurs, the routine stops. if 2, like 1, but before stopping, the routine will provide an estimation of the available memory. pspheads(npsp)=<type pspheader_type>all the important information from the pseudopotential file header, as well as the psp file name
OUTPUT
(only writing)
NOTES
The estimator is the one provided by BigDFT.
SOURCE
2501 subroutine wvl_memory(dtset, idtset, mpi_enreg, npsp, option, pspheads) 2502 2503 use defs_wvltypes 2504 use m_abi2big, only : wvl_setBoxGeometry 2505 use m_wvl_descr_psp, only : wvl_descr_free, wvl_descr_atoms_set 2506 2507 #if defined HAVE_BIGDFT 2508 use BigDFT_API, only: MemoryEstimator, createWavefunctionsDescriptors, deallocate_lr, & 2509 & atomic_info, memory_estimation 2510 #endif 2511 2512 !Arguments ------------------------------------ 2513 !scalars 2514 integer,intent(in) :: idtset, npsp, option 2515 type(dataset_type),intent(in) :: dtset 2516 type(MPI_type),intent(in) :: mpi_enreg 2517 !arrays 2518 type(pspheader_type),intent(in) :: pspheads(npsp) 2519 2520 !Local variables------------------------------- 2521 #if defined HAVE_BIGDFT 2522 !scalars 2523 integer :: ityp, i, mu, nstates, me, nproc, comm 2524 character(len=500) :: msg 2525 real(dp) :: ehomo, radfine 2526 type(wvl_internal_type) :: wvl 2527 type(memory_estimation) :: peakmem 2528 !arrays 2529 real(dp) :: acell(3), rprimd(3,3), rprim(3,3) 2530 real(dp), allocatable :: radii_cf(:,:) 2531 real(dp), allocatable :: xred(:,:), xcart(:,:) 2532 #endif 2533 2534 ! ************************************************************************** 2535 2536 #if defined HAVE_BIGDFT 2537 2538 comm=mpi_enreg%comm_wvl 2539 me=xmpi_comm_rank(comm) 2540 nproc=xmpi_comm_size(comm) 2541 2542 if(option<0 .or. option>2)then 2543 write(msg, '(A,A,A,A,I0,A)') ch10,& 2544 & ' wvl_memory : BUG -',ch10,& 2545 & ' option=',option,' while the only allowed values are 0, 1, or 2.' 2546 call wrtout(std_out,msg) 2547 end if 2548 2549 wvl%paw%usepaw=0 !no PAW here 2550 nullify(wvl%rholoc%d) 2551 nullify(wvl%rholoc%msz) 2552 nullify(wvl%rholoc%rad) 2553 nullify(wvl%rholoc%radius) 2554 nullify(wvl%paw%spsi) 2555 nullify(wvl%paw%indlmn) 2556 nullify(wvl%paw%spsi) 2557 nullify(wvl%paw%indlmn) 2558 2559 write(msg,*)' wvl_memory : analysis of memory needs ' 2560 call wrtout(std_out,msg) 2561 2562 if(idtset>=100)then 2563 write(msg,'(80a,a,a,i5,a)')('=',mu=1,80),ch10,& 2564 & ' Values of the parameters that define the memory need for DATASET', idtset,& 2565 & ' (WVL).' 2566 else if(idtset/=0)then 2567 write(msg,'(80a,a,a,i3,a)')('=',mu=1,80),ch10,& 2568 & ' Values of the parameters that define the memory need for DATASET', idtset,& 2569 & ' (WVL).' 2570 else 2571 write(msg,'(80a,a,a,a)')('=',mu=1,80),ch10,& 2572 & ' Values of the parameters that define the memory need of the present run',& 2573 & ' (WVL).' 2574 end if 2575 call wrtout(ab_out,msg) 2576 call wrtout(std_out,msg) 2577 2578 write(msg,'( a,f7.3,a,i7,2(a,F7.3),a,a,f7.3,a,i7 )' ) & 2579 & ' wvl_hgrid =', dtset%wvl_hgrid , ' nwfshist =', dtset%nwfshist, & 2580 & ' wvl_crmult =', dtset%wvl_crmult, ' wvl_frmult =', dtset%wvl_frmult, ch10,& 2581 & ' tl_radius =', dtset%tl_radius , ' tl_nprccg =', dtset%tl_nprccg 2582 call wrtout(ab_out,msg) 2583 call wrtout(std_out,msg) 2584 2585 if (dtset%nsppol == 2) then 2586 nstates = dtset%nelect 2587 else 2588 nstates = dtset%mband 2589 end if 2590 write(msg,'(4(a,i7))')& 2591 & ' natom =', dtset%natom, ' ntypat =', dtset%ntypat, & 2592 & ' nstates =', nstates, ' nsppol =', dtset%nsppol 2593 call wrtout(ab_out,msg) 2594 call wrtout(std_out,msg) 2595 2596 write(msg,'(80a)') ('=',mu=1,80) 2597 call wrtout(ab_out,msg) 2598 call wrtout(std_out,msg) 2599 2600 !First, use eleconf to get radii_cf(). 2601 ABI_MALLOC(radii_cf,(npsp, 3)) 2602 do ityp = 1, npsp, 1 2603 call atomic_info(int(pspheads(ityp)%znuclpsp), int(pspheads(ityp)%zionpsp), ehomo = ehomo) 2604 2605 ! new method for assigning the radii 2606 radii_cf(ityp, 1) = one / sqrt(abs(two * ehomo)) 2607 radfine = 100.d0 2608 do i = 0, 4, 1 2609 if (pspheads(ityp)%GTHradii(i) /= zero) then 2610 radfine = min(radfine, pspheads(ityp)%GTHradii(i)) 2611 end if 2612 end do 2613 radii_cf(ityp,2) = radfine 2614 end do 2615 2616 !Compute the shifted positions and acell 2617 acell = dtset%acell_orig(1:3,1) 2618 call wvl_descr_atoms_set(acell, dtset%icoulomb, dtset%natom, dtset%ntypat, dtset%typat, wvl) 2619 ABI_MALLOC(xred,(3, dtset%natom)) 2620 xred = dtset%xred_orig(:,:,1) 2621 rprimd = dtset%rprimd_orig(1:3,1:3,1) 2622 wvl%h(:) = dtset%wvl_hgrid 2623 call wvl_setBoxGeometry(1, radii_cf, rprimd, xred, & 2624 & wvl, dtset%wvl_crmult, dtset%wvl_frmult) 2625 !Compute acell and rprim from rprimd 2626 call mkradim(acell,rprim,rprimd) 2627 ABI_MALLOC(xcart,(3, dtset%natom)) 2628 call xred2xcart(dtset%natom, rprimd, xcart, xred) 2629 call createWavefunctionsDescriptors(me, wvl%h(1), wvl%h(2), wvl%h(3), & 2630 & wvl%atoms, xcart, radii_cf, dtset%wvl_crmult, dtset%wvl_frmult, wvl%Glr) 2631 call MemoryEstimator(nproc, dtset%nwfshist, wvl%Glr, & 2632 & dtset%mband, dtset%nspinor, dtset%nkpt, 0, dtset%nsppol, & 2633 & 0, dtset%iscf, peakmem) 2634 2635 call deallocate_lr(wvl%Glr) 2636 call wvl_descr_free(wvl) 2637 ABI_FREE(radii_cf) 2638 ABI_FREE(xred) 2639 ABI_FREE(xcart) 2640 2641 write(msg,'(80a,a)') ('=',mu=1,80), ch10 2642 call wrtout(ab_out,msg) 2643 call wrtout(std_out,msg) 2644 2645 #else 2646 BIGDFT_NOTENABLED_ERROR() 2647 if (.false.) write(std_out,*) idtset,npsp,option,dtset%nstep,mpi_enreg%nproc,pspheads(1)%zionpsp 2648 #endif 2649 2650 end subroutine wvl_memory