TABLE OF CONTENTS
ABINIT/mpi_setup [ Functions ]
NAME
mpi_setup
FUNCTION
Big loop on the datasets : - compute mgfft,mpw,nfft,... for this data set ; - fill mpi_enreg *** At the output of this routine, all the dtsets input variables are known *** The content of dtsets should not be modified anymore afterwards.
COPYRIGHT
Copyright (C) 1999-2018 ABINIT group (FJ,MT) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt . For the initials of contributors, see ~abinit/doc/developers/contributors.txt.
INPUTS
filnam(5)=character strings giving file names ndtset= number of datasets to be read; if 0, no multi-dataset mode ndtset_alloc=number of datasets, corrected for allocation of at least one data set.
OUTPUT
dtsets(0:ndtset_alloc)=<type datafiles_type>contains all input variables, some of which are initialized here, while other were already initialized previously.
SIDE EFFECTS
mpi_enregs=informations about MPI parallelization
PARENTS
abinit
CHILDREN
abi_io_redirect,distrb2,distrb2_hf,finddistrproc,get_npert_rbz,getmpw getng,init_distribfft,init_mpi_enreg,initmpi_atom,initmpi_grid initmpi_img,initmpi_pert,intagm,libpaw_write_comm_set,metric,mkrdim wrtout
SOURCE
45 #if defined HAVE_CONFIG_H 46 #include "config.h" 47 #endif 48 49 #include "abi_common.h" 50 51 subroutine mpi_setup(dtsets,filnam,lenstr,mpi_enregs,ndtset,ndtset_alloc,string) 52 53 use defs_basis 54 use defs_abitypes 55 use defs_parameters 56 use m_distribfft 57 use m_xmpi 58 use m_errors 59 use m_profiling_abi 60 61 use m_geometry, only : metric 62 use m_parser, only : intagm 63 use m_geometry, only : mkrdim 64 use m_fftcore, only : fftalg_for_npfft 65 use m_mpinfo, only : init_mpi_enreg,mpi_distrib_is_ok 66 use m_libpaw_tools, only : libpaw_write_comm_set 67 use m_dtset, only : get_npert_rbz 68 use m_kg, only : getmpw 69 70 !This section has been created automatically by the script Abilint (TD). 71 !Do not modify the following lines by hand. 72 #undef ABI_FUNC 73 #define ABI_FUNC 'mpi_setup' 74 use interfaces_14_hidewrite 75 use interfaces_32_util 76 use interfaces_51_manage_mpi 77 use interfaces_52_fft_mpi_noabirule 78 use interfaces_57_iovars, except_this_one => mpi_setup 79 !End of the abilint section 80 81 implicit none 82 83 !Arguments ------------------------------------ 84 !scalars 85 integer,intent(in) :: lenstr,ndtset,ndtset_alloc 86 type(MPI_type),intent(inout) :: mpi_enregs(0:ndtset_alloc) 87 character(len=*),intent(inout) :: string 88 !arrays 89 character(len=fnlen),intent(in) :: filnam(5) 90 type(dataset_type),intent(inout) :: dtsets(0:ndtset_alloc) 91 92 !Local variables ------------------------------- 93 !scalars 94 integer :: blocksize,exchn2n3d,iband,idtset,iexit,ii,iikpt,iikpt_modulo 95 integer :: isppol,jdtset,marr,mband_lower,mband_upper 96 integer :: me_fft,mgfft,mgfftdg,mkmem,mpw,mpw_k,optdriver 97 integer :: nfft,nfftdg,nkpt,nkpt_me,npert,nproc,nproc_fft,nqpt 98 integer :: nspink,nsppol,nsym,paral_fft,response,tnband,tread0,usepaw,vectsize 99 integer :: fftalg,fftalga,fftalgc 100 #ifdef HAVE_LINALG_ELPA 101 integer :: icol,irow,np 102 #endif 103 logical :: fftalg_read,ortalg_read,wfoptalg_read,do_check 104 real(dp) :: dilatmx,ecut,ecut_eff,ecutdg_eff,ucvol 105 character(len=500) :: message 106 !arrays 107 integer :: ngfft(18),ngfftdg(18),ngfftc(3),tread(11) 108 integer,allocatable :: intarr(:),istwfk(:),symrel(:,:,:) 109 integer,pointer :: nkpt_rbz(:) 110 real(dp),parameter :: k0(3)=(/zero,zero,zero/) 111 real(dp) :: gmet(3,3),gprimd(3,3),kpt(3),qphon(3),rmet(3,3),rprimd(3,3) 112 real(dp),allocatable :: dprarr(:),kpt_with_shift(:,:) 113 real(dp),pointer :: nband_rbz(:,:) 114 character(len=6) :: nm_mkmem(3) 115 116 !************************************************************************* 117 118 DBG_ENTER("COLL") 119 120 iexit=0;mpw_k=0 121 122 call init_mpi_enreg(mpi_enregs(0)) 123 call initmpi_img(dtsets(0),mpi_enregs(0),-1) 124 125 do idtset=1,ndtset_alloc 126 call init_mpi_enreg(mpi_enregs(idtset)) 127 128 ! Handy read-only variables. 129 optdriver = dtsets(idtset)%optdriver 130 131 ! Read parallel input parameters 132 marr=max(5,dtsets(idtset)%npsp,dtsets(idtset)%nimage) 133 ABI_ALLOCATE(intarr,(marr)) 134 ABI_ALLOCATE(dprarr,(marr)) 135 nkpt =dtsets(idtset)%nkpt 136 nsppol=dtsets(idtset)%nsppol 137 jdtset=dtsets(idtset)%jdtset ; if(ndtset==0)jdtset=0 138 usepaw=dtsets(idtset)%usepaw 139 mband_upper=maxval(dtsets(idtset)%nband(1:nkpt*nsppol)) 140 mband_lower=minval(dtsets(idtset)%nband(1:nkpt*nsppol)) 141 142 ! Compute metric for this dataset 143 call mkrdim(dtsets(idtset)%acell_orig(1:3,1),dtsets(idtset)%rprim_orig(1:3,1:3,1),rprimd) 144 call metric(gmet,gprimd,-1,rmet,rprimd,ucvol) 145 146 ! Read paral_kgb and disable it if not supported in optdriver. 147 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'paral_kgb',tread(1),'INT') 148 if (tread(1)==1) dtsets(idtset)%paral_kgb=intarr(1) 149 150 if(xmpi_paral==0.and.dtsets(idtset)%paral_kgb==1)then 151 dtsets(idtset)%paral_kgb=0 152 write(message, '(5a)' ) & 153 & 'When ABINIT is compiled without MPI flag,',ch10,& 154 & 'setting paral_kgb/=0 is useless. paral_kgb has been reset to 0.',ch10,& 155 & 'Action : modify compilation option or paral_kgb in the input file.' 156 MSG_WARNING(message) 157 end if 158 159 if ( ALL(optdriver /= [RUNL_GSTATE, RUNL_GWLS]) .and. dtsets(idtset)%paral_kgb/=0) then 160 dtsets(idtset)%paral_kgb=0 161 write(message, '(a,i0,a)') & 162 & "paral_kgb != 0 is not available in optdriver ",optdriver,". Setting paral_kgb to 0" 163 MSG_COMMENT(message) 164 end if 165 166 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'max_ncpus',tread0,'INT') 167 if (tread0==1) dtsets(idtset)%max_ncpus=intarr(1) 168 169 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'paral_atom',tread0,'INT') 170 if(tread0==1) dtsets(idtset)%paral_atom=intarr(1) 171 172 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'paral_rf',tread0,'INT') 173 if (tread0==1.and.optdriver==RUNL_RESPFN) dtsets(idtset)%paral_rf=intarr(1) 174 175 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'npimage',tread(2),'INT') 176 if(tread(2)==1) dtsets(idtset)%npimage=intarr(1) 177 178 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'nppert',tread(3),'INT') 179 if (tread(3)==1.and.optdriver==RUNL_RESPFN) dtsets(idtset)%nppert=intarr(1) 180 181 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'npkpt',tread(4),'INT') 182 if(tread(4)==1) dtsets(idtset)%npkpt=intarr(1) 183 184 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'npspinor',tread(5),'INT') 185 if(tread(5)==1) dtsets(idtset)%npspinor=intarr(1) 186 187 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'npfft',tread(6),'INT') 188 if(tread(6)==1) dtsets(idtset)%npfft=intarr(1) 189 190 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'npband',tread(7),'INT') 191 if(tread(7)==1) dtsets(idtset)%npband=intarr(1) 192 193 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'bandpp',tread(8),'INT') 194 if(tread(8)==1) dtsets(idtset)%bandpp=intarr(1) 195 196 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'use_slk',tread(9),'INT') 197 if(tread(9)==1) dtsets(idtset)%use_slk=intarr(1) 198 199 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'np_slk',tread(10),'INT') 200 if(tread(10)==1) dtsets(idtset)%np_slk=intarr(1) 201 202 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'pw_unbal_thresh',tread0,'DPR') 203 if(tread0==1) dtsets(idtset)%pw_unbal_thresh=dprarr(1) 204 mpi_enregs(idtset)%pw_unbal_thresh=dtsets(idtset)%pw_unbal_thresh 205 206 call intagm(dprarr,intarr,jdtset,marr,5,string(1:lenstr),'gpu_devices',tread0,'INT') 207 if(tread0==1) dtsets(idtset)%gpu_devices(1:5)=intarr(1:5) 208 209 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'gpu_linalg_limit',tread(11),'INT') 210 if(tread(11)==1) dtsets(idtset)%gpu_linalg_limit=intarr(1) 211 212 213 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'nphf',tread0,'INT') 214 if(tread0==1) dtsets(idtset)%nphf=intarr(1) 215 216 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'autoparal',tread0,'INT') 217 if(tread0==1) dtsets(idtset)%autoparal=intarr(1) 218 219 ! Dump the list of irreducible perturbations and exit. 220 if (dtsets(idtset)%paral_rf==-1) then 221 call get_npert_rbz(dtsets(idtset),nband_rbz,nkpt_rbz,npert) 222 ABI_DEALLOCATE(nband_rbz) 223 ABI_DEALLOCATE(nkpt_rbz) 224 iexit = iexit + 1 225 end if 226 227 ! From total number of procs, compute all possible distributions 228 ! Ignore exit flag if GW/EPH calculations because autoparal section is performed in screening/sigma/bethe_salpeter/eph 229 call finddistrproc(dtsets,filnam,idtset,iexit,mband_upper,mpi_enregs(idtset),ndtset_alloc,tread) 230 if (any(optdriver == [RUNL_SCREENING, RUNL_SIGMA, RUNL_BSE, RUNL_EPH])) iexit = 0 231 232 if ((optdriver/=RUNL_GSTATE.and.optdriver/=RUNL_GWLS).and. & 233 & (dtsets(idtset)%npkpt/=1 .or.dtsets(idtset)%npband/=1.or.dtsets(idtset)%npfft/=1.or. & 234 & dtsets(idtset)%npspinor/=1.or.dtsets(idtset)%bandpp/=1)) then 235 !& .or.(dtsets(idtset)%iscf<0)) then 236 dtsets(idtset)%npkpt=1 ; dtsets(idtset)%npspinor=1 ; dtsets(idtset)%npfft=1 237 dtsets(idtset)%npband=1; dtsets(idtset)%bandpp=1 ; dtsets(idtset)%nphf=1 238 dtsets(idtset)%paral_kgb=0 239 message = 'For non ground state calculation, set bandpp, npfft, npband, npspinor npkpt and nphf to 1' 240 MSG_WARNING(message) 241 end if 242 243 ! Read again some input data to take into account a possible change of paral_kgb 244 wfoptalg_read=.false. 245 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'wfoptalg',tread0,'INT') 246 if(tread0==1) then 247 dtsets(idtset)%wfoptalg=intarr(1) 248 wfoptalg_read=.true. 249 else 250 if (dtsets(idtset)%usepaw==0) dtsets(idtset)%wfoptalg=0 251 if (dtsets(idtset)%usepaw/=0) dtsets(idtset)%wfoptalg=10 252 if ((optdriver==RUNL_GSTATE.or.optdriver==RUNL_GWLS).and.dtsets(idtset)%paral_kgb/=0) dtsets(idtset)%wfoptalg=114 253 if (mod(dtsets(idtset)%wfoptalg,10)==4) then 254 do iikpt=1,dtsets(idtset)%nkpt 255 if (any(abs(dtsets(idtset)%kpt(:,iikpt))>tol8)) dtsets(idtset)%istwfk(iikpt)=1 256 end do 257 end if 258 end if 259 260 dtsets(idtset)%densfor_pred=2 261 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'densfor_pred',tread0,'INT') 262 if(tread0==1) then 263 dtsets(idtset)%densfor_pred=intarr(1) 264 else 265 if (dtsets(idtset)%paral_kgb==1) dtsets(idtset)%densfor_pred=6 266 end if 267 if((dtsets(idtset)%iscf==5.or.dtsets(idtset)%iscf==6) & 268 & .and. dtsets(idtset)%ionmov==4 .and. dtsets(idtset)%densfor_pred/=3 )then 269 dtsets(idtset)%densfor_pred=3 270 write(message, '(a,a,a)' )& 271 & 'When ionmov==4 and iscf==5 or 6, densfor_pred must be 3.',ch10,& 272 & 'Set densfor_pred to 3.' 273 MSG_COMMENT(message) 274 end if 275 276 #ifdef HAVE_LOTF 277 ! LOTF need densfor_pred=2 278 if(dtsets(idtset)%ionmov==23) dtsets(idtset)%densfor_pred=2 279 #endif 280 281 if (usepaw==0) then 282 dtsets(idtset)%ortalg=2 283 else 284 dtsets(idtset)%ortalg=-2 285 end if 286 ortalg_read=.false. 287 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'ortalg',tread0,'INT') 288 if(tread0==1) then 289 dtsets(idtset)%ortalg=intarr(1) 290 ortalg_read=.true. 291 else if (dtsets(idtset)%wfoptalg>=10 .and. dtsets(idtset)%ortalg>0) then 292 dtsets(idtset)%ortalg=-dtsets(idtset)%ortalg 293 end if 294 295 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'iomode',tread0,'INT') 296 if(tread0==1) then 297 dtsets(idtset)%iomode=intarr(1) 298 else 299 if ((xmpi_mpiio==1).and.(dtsets(idtset)%paral_kgb==1)) dtsets(idtset)%iomode=IO_MODE_MPI 300 #ifdef HAVE_NETCDF_DEFAULT 301 dtsets(idtset)%iomode=IO_MODE_ETSF 302 #endif 303 end if 304 305 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'pawmixdg',tread0,'INT') 306 if(tread0==1) then 307 dtsets(idtset)%pawmixdg=intarr(1) 308 else if (dtsets(idtset)%npfft>1.and.usepaw==1) then 309 dtsets(idtset)%pawmixdg=1 310 end if 311 312 call initmpi_img(dtsets(idtset),mpi_enregs(idtset),-1) 313 nproc=mpi_enregs(idtset)%nproc_cell 314 315 ! Cycle if the processor is not used 316 if (mpi_enregs(idtset)%me<0) then 317 ABI_DEALLOCATE(intarr) 318 ABI_DEALLOCATE(dprarr) 319 cycle 320 end if 321 322 response=0 323 if (dtsets(idtset)%rfddk/=0 .or. dtsets(idtset)%rf2_dkdk/=0 .or. dtsets(idtset)%rf2_dkde/=0 .or. & 324 & dtsets(idtset)%rfelfd/=0 .or. dtsets(idtset)%rfphon/=0 .or. dtsets(idtset)%rfstrs/=0 .or. & 325 & dtsets(idtset)%rfuser/=0 .or. dtsets(idtset)%rfmagn/=0) response=1 326 327 ! If no MPI, set all npxxx variables to 1 328 if (nproc==1) then 329 dtsets(idtset)%npkpt = 1 ; dtsets(idtset)%npband = 1 330 dtsets(idtset)%npfft = 1 ; dtsets(idtset)%npspinor = 1 331 dtsets(idtset)%nphf = 1 332 end if 333 334 ! --IF CUDA AND RECURSION:ONLY BAND PARALLELISATION 335 if(dtsets(idtset)%tfkinfunc==2 .and. nproc/=1)then 336 dtsets(idtset)%npband = dtsets(idtset)%npband*dtsets(idtset)%npkpt*dtsets(idtset)%npspinor*dtsets(idtset)%npfft 337 dtsets(idtset)%npkpt = 1 338 dtsets(idtset)%npfft = 1 339 dtsets(idtset)%npspinor = 1 340 write(message, '(5a,i6,a)' )& 341 & 'If HAVE_GPU_CUDA and recursion are used ',ch10,& 342 & 'only the band parallelisation is active, we set:',ch10,& 343 & 'npfft= 1, npkpt= 1, npband=',dtsets(idtset)%npband,' .' 344 MSG_WARNING(message) 345 end if 346 347 if (dtsets(idtset)%npspinor>=2.and.dtsets(idtset)%nspinor==1) then 348 dtsets(idtset)%npspinor=1 349 dtsets(idtset)%npfft=2*dtsets(idtset)%npfft 350 write(message,'(3a)')& 351 & 'npspinor is bigger than nspinor !',ch10,& 352 & 'We set npspinor to 1 ; we set npfft to 2*npfft' 353 MSG_WARNING(message) 354 end if 355 356 ! Some checks on parallelization data 357 if(dtsets(idtset)%paral_kgb < 0 ) then 358 cycle 359 else if(dtsets(idtset)%paral_kgb/=0.and.(dtsets(idtset)%bandpp/=1.or.dtsets(idtset)%npband/=1.or.& 360 & dtsets(idtset)%npfft/=1.or.dtsets(idtset)%npkpt/=1.or.dtsets(idtset)%npspinor/=1))then 361 if(dtsets(idtset)%npkpt*dtsets(idtset)%npfft*dtsets(idtset)%npband*dtsets(idtset)%npspinor > nproc )then 362 write(message,'(7a)')& 363 & 'The product of npkpt, npfft, npband and npspinor is bigger than the number of processors.',ch10,& 364 & 'The user-defined values of npkpt, npfft, npband or npspinor will be modified,',ch10,& 365 & 'in order to bring this product below nproc .',ch10,& 366 & 'At present, only a very simple algorithm is used ...' 367 MSG_WARNING(message) 368 369 if(dtsets(idtset)%npkpt*dtsets(idtset)%npband*dtsets(idtset)%npspinor <= nproc) then 370 dtsets(idtset)%npfft=1 371 MSG_WARNING('Set npfft to 1') 372 else if(dtsets(idtset)%npkpt*dtsets(idtset)%npspinor <= nproc)then 373 dtsets(idtset)%npfft=1 374 dtsets(idtset)%npband=1 375 MSG_WARNING('Set npfft and npband to 1') 376 else if(dtsets(idtset)%npkpt <= nproc)then 377 dtsets(idtset)%npfft=1 378 dtsets(idtset)%npband=1 379 dtsets(idtset)%npspinor=1 380 MSG_WARNING('Set npfft ,npband and npspinor to 1') 381 else 382 dtsets(idtset)%npfft=1 383 dtsets(idtset)%npband=1 384 dtsets(idtset)%npkpt=1 385 dtsets(idtset)%npspinor=1 386 MSG_WARNING('Set npfft, npband, nspinor and npkpt to 1') 387 end if 388 else if(dtsets(idtset)%npkpt*dtsets(idtset)%npfft*dtsets(idtset)%npband*dtsets(idtset)%npspinor < nproc)then 389 write(message,'(2a)')& 390 & 'The number of processor must not be greater than npfft*npband*npkpt*npsinor ',& 391 & 'when npfft or npkpt or npband or nspinor are chosen manually in the input file.' 392 MSG_ERROR(message) 393 end if 394 end if 395 396 ! LOBPCG and ChebFi need paral_kgb=1 in parallel 397 if ((dtsets(idtset)%npband*dtsets(idtset)%npfft>1).and. & 398 & (mod(dtsets(idtset)%wfoptalg,10)==1.or.mod(dtsets(idtset)%wfoptalg,10)==4)) then 399 dtsets(idtset)%paral_kgb=1 400 end if 401 402 ! Check size of Scalapack communicator 403 #ifdef HAVE_LINALG_ELPA 404 if(dtsets(idtset)%paral_kgb>0.and.dtsets(idtset)%np_slk>0) then 405 np=min(dtsets(idtset)%np_slk,dtsets(idtset)%npband*dtsets(idtset)%npfft*dtsets(idtset)%npspinor) 406 irow=int(sqrt(float(np))) 407 do while(mod(np,irow)/=0) 408 irow=irow-1 409 end do 410 icol=nproc/irow 411 if (icol>mband_lower) then 412 do while(icol>mband_lower) 413 icol=icol-1 414 do while(mod(np,icol)/=0) 415 icol=icol-1 416 end do 417 end do 418 dtsets(idtset)%np_slk=icol 419 write(message,'(5a,i6,a)')& 420 & 'The number of band*fft*spinor processors was not consistent with',ch10,& 421 & 'the size of communicator used for ELPA library (np_slk).',ch10,& 422 & 'np_slk value has been adjusted to ',dtsets(idtset)%np_slk,'.' 423 MSG_COMMENT(message) 424 end if 425 end if 426 #endif 427 428 !Additional check in case of a parallelized Hartree-Fock calculation 429 ! %usefock == option to perform Fock exchange calculation 430 ! %nphf == number of processors for Fock exchange calculation 431 if ((dtsets(idtset)%usefock==1).and.(dtsets(idtset)%nphf/=1)) then 432 433 if ((dtsets(idtset)%nphf<0).or.(dtsets(idtset)%nphf==0)) then 434 MSG_ERROR('The value of variable nphf should be a non negative integer.') 435 end if 436 if (dtsets(idtset)%paral_kgb/=0) then 437 message = 'Option paral_kgb should be turned off (value 0) for a parallelized Hartree-Fock calculation.' 438 MSG_ERROR(message) 439 end if 440 if (response/=0) then 441 message = 'A response function calculation is not yet possible with a parallelized Hartree-Fock calculation.' 442 MSG_ERROR(message) 443 end if 444 if (dtsets(idtset)%npspinor>1) then 445 message = 'The parallelism on spinors is not supported by a parallelized Hartree-Fock calculation.' 446 MSG_ERROR(message) 447 end if 448 if (dtsets(idtset)%npkpt*dtsets(idtset)%nphf > nproc )then 449 write(message,'(a,3(a,i0))') ch10,& 450 & 'The product of variables npkpt and nphf is bigger than the number of processors: nkpt= ',& 451 & dtsets(idtset)%npkpt,' nphf= ',dtsets(idtset)%nphf ,' and nproc= ', nproc 452 MSG_ERROR(message) 453 end if 454 end if ! Fock 455 456 !When using chebfi, the number of blocks is equal to the number of processors 457 if((dtsets(idtset)%wfoptalg == 1)) then 458 !Nband might have different values for different kpoint, but not bandpp. 459 !In this case, we just use the largest nband, andthe input will probably fail 460 !at the bandpp check later on 461 dtsets(idtset)%bandpp = mband_upper / dtsets(idtset)%npband 462 end if 463 464 ! Set mpi_enreg 465 mpi_enregs(idtset)%paral_kgb=dtsets(idtset)%paral_kgb 466 if(dtsets(idtset)%paral_kgb/=0)then 467 mpi_enregs(idtset)%nproc_kpt=dtsets(idtset)%npkpt 468 mpi_enregs(idtset)%nproc_fft=dtsets(idtset)%npfft 469 mpi_enregs(idtset)%nproc_band=dtsets(idtset)%npband 470 mpi_enregs(idtset)%nproc_spinor=min(dtsets(idtset)%npspinor,dtsets(idtset)%nspinor) 471 mpi_enregs(idtset)%bandpp=dtsets(idtset)%bandpp 472 ! Additional setting in case of hybrid functional calculation => not yet tested (CMartins) 473 ! if (dtsets(idtset)%usefock==1) then 474 ! mpi_enregs(idtset)%nproc_hf = dtsets(idtset)%nphf 475 ! if (dtsets(idtset)%nphf>1) mpi_enregs(idtset)%paral_hf=1 476 ! end if 477 else 478 mpi_enregs(idtset)%bandpp = dtsets(idtset)%bandpp 479 ! Additional setting in case of a Fock exchange of PBE0 calculation 480 if (dtsets(idtset)%usefock==1) then 481 if (dtsets(idtset)%nphf>1) mpi_enregs(idtset)%paral_hf=1 482 mpi_enregs(idtset)%nproc_hf = dtsets(idtset)%nphf 483 if (dtsets(idtset)%npkpt/=1) then 484 mpi_enregs(idtset)%nproc_kpt = dtsets(idtset)%npkpt 485 else 486 mpi_enregs(idtset)%nproc_kpt = mpi_enregs(idtset)%nproc_cell/mpi_enregs(idtset)%nproc_hf 487 end if 488 else 489 mpi_enregs(idtset)%nproc_kpt = mpi_enregs(idtset)%nproc_cell 490 end if 491 end if 492 493 if(dtsets(idtset)%paral_kgb>=0) then 494 495 ! Compute processor distribution over perturbations 496 mpi_enregs(idtset)%paral_pert=dtsets(idtset)%paral_rf 497 if (mpi_enregs(idtset)%paral_pert==1) then 498 dtsets(idtset)%nppert=max(1,dtsets(idtset)%nppert) 499 if(dtsets(idtset)%nppert>mpi_enregs(idtset)%nproc) then 500 message=' The number of processors must not be smaller than nppert !' 501 MSG_ERROR(message) 502 end if 503 call initmpi_pert(dtsets(idtset),mpi_enregs(idtset)) 504 mpi_enregs(idtset)%nproc_kpt = mpi_enregs(idtset)%nproc_cell 505 nproc=mpi_enregs(idtset)%nproc_cell 506 end if 507 ! Cycle if the processor is not used 508 if (mpi_enregs(idtset)%me<0) then 509 ABI_DEALLOCATE(intarr) 510 ABI_DEALLOCATE(dprarr) 511 cycle 512 end if 513 514 ! Compute processor distribution over kpt (and eventually band-fft) 515 call initmpi_grid(mpi_enregs(idtset)) 516 if(dtsets(idtset)%usewvl==1) mpi_enregs(idtset)%comm_fft=mpi_enregs(idtset)%comm_cell 517 518 ! Initialize tabs used for k/spin parallelism (with sequential-type values) 519 ABI_ALLOCATE(mpi_enregs(idtset)%proc_distrb,(nkpt,mband_upper,nsppol)) 520 ABI_ALLOCATE(mpi_enregs(idtset)%my_kpttab,(nkpt)) 521 mpi_enregs(idtset)%proc_distrb(:,:,:)=0 522 mpi_enregs(idtset)%my_kpttab(:)=(/(ii,ii=1,nkpt)/) 523 mpi_enregs(idtset)%my_isppoltab(:)=1;if (dtsets(idtset)%nsppol==1) mpi_enregs(idtset)%my_isppoltab(2)=0 524 525 ! HF or hybrid calculation : initialization of the array distrb_hf 526 if (dtsets(idtset)%usefock==1) then 527 ABI_ALLOCATE(mpi_enregs(idtset)%distrb_hf,(dtsets(idtset)%nkpthf,dtsets(idtset)%nbandhf,1)) 528 ! The dimension of distrb_hf are given by %nkpthf and %nbandhf. 529 ! We assume that there will be no dependence in spinpol for all the occupied states. 530 mpi_enregs(idtset)%distrb_hf=0 531 end if 532 533 ! Define k-points distribution (determine who I am) 534 ! Note that nkpt_me may differ from processor to processor 535 ! This fact will NOT be taken into account when 536 ! the memory needs will be evaluated in the subroutine memory. 537 ! Also, the reduction of k points due to symmetry in RF calculations 538 ! is NOT taken into account. This should be changed later ... 539 nkpt_me=nkpt 540 if(xmpi_paral==1 .and. dtsets(idtset)%usewvl == 0) then 541 nkpt_me=0 542 if(response==0 .or. (response==1 .and. dtsets(idtset)%efmas==1))then 543 mpi_enregs(idtset)%paralbd=0 544 call distrb2(mband_upper,dtsets(idtset)%nband,nkpt,nproc,nsppol,mpi_enregs(idtset)) 545 do iikpt=1,nkpt 546 if(.not.(proc_distrb_cycle(mpi_enregs(idtset)%proc_distrb,iikpt,1,1,-1,mpi_enregs(idtset)%me_kpt)))& 547 & nkpt_me=nkpt_me+1 548 end do ! ikpt=1,nkpt 549 ! HF or hybrid calculation : define the occupied states distribution (in array distrb_hf) 550 if (dtsets(idtset)%usefock==1) then 551 call distrb2_hf(dtsets(idtset)%nbandhf,dtsets(idtset)%nkpthf,nproc,nsppol,mpi_enregs(idtset)) 552 end if 553 else ! response==1 554 ! Wrongly assumes that the number of elements of the 555 ! k-point sets of the two spin polarizations is the maximal 556 ! value of one of these k-point sets ... 557 ! This is to be corrected when RF is implemented 558 ! for spin-polarized case. 559 mpi_enregs(idtset)%paralbd=1 560 ! nproc=mpi_enregs(idtset)%nproc_cell*mpi_enregs(idtset)%nproc_pert 561 call distrb2(mband_upper,dtsets(idtset)%nband,nkpt,nproc,nsppol,mpi_enregs(idtset)) 562 do isppol=1,nsppol 563 nspink=0 564 do iikpt=1,nkpt 565 do iband=1,dtsets(idtset)%nband(iikpt+(isppol-1)*nkpt) 566 if(mpi_enregs(idtset)%proc_distrb(iikpt,iband,isppol)==mpi_enregs(idtset)%me_cell)then 567 nspink=nspink+1 568 exit 569 end if 570 end do ! iband 571 end do ! iikpt 572 if(nspink>nkpt_me)nkpt_me=nspink 573 end do ! isppol 574 ! Is nband present in input file or automatically estimated ? 575 tnband=0 576 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'nband',tnband,'INT') 577 ! If the number of bands was estimated, there might be a side effect 578 ! when the definitive number of bands is known. k points 579 ! might be attributed to different processors than the present 580 ! proc_distrb describes. At most, the number of k points could increase by 1 ... 581 if(tnband==0)nkpt_me=nkpt_me+1 582 ! In any case, the maximal number of k points is nkpt 583 if(nkpt_me>nkpt)nkpt_me=nkpt 584 end if 585 end if 586 end if 587 588 ! Take care of mkmems. Use the generic name -mkmem- for mkmem as well as mkqmem 589 ! and mk1mem. 590 nm_mkmem(1)='mkmem ' 591 nm_mkmem(2)='mkqmem' 592 nm_mkmem(3)='mk1mem' 593 594 do ii=1,3 595 596 ! Read in mkmem here if it is in the input file 597 if(ii==1)then 598 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'mkmem',tread0,'INT') 599 else if(ii==2)then 600 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'mkqmem',tread0,'INT') 601 else if(ii==3)then 602 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'mk1mem',tread0,'INT') 603 end if 604 605 ! Note that mkmem is used as a dummy variable, representing mkmem as well 606 ! as mkqmem, and mk1mem. 607 if(tread0==1) then 608 mkmem=intarr(1) 609 if (mkmem<0) then 610 ! mkmem is unreasonable; must be zero or positive 611 write(message, '(4a,i0,4a)')& 612 & nm_mkmem(ii),' must be positive or null but ',nm_mkmem(ii),' =',mkmem,ch10,& 613 & 'Use default ',nm_mkmem(ii),' = nkpt .' 614 MSG_WARNING(message) 615 mkmem=nkpt 616 end if 617 618 else 619 620 ! mkmem was not set in the input file so default to incore solution 621 write(message,'(6a)') & 622 & 'mpi_setup: ',nm_mkmem(ii),' undefined in the input file.','Use default ',nm_mkmem(ii),' = nkpt' 623 call wrtout(std_out,message,'COLL') 624 mkmem=nkpt 625 end if 626 627 ! Check whether nkpt distributed on the processors <= mkmem; 628 ! if so then may run entirely in core, 629 ! avoiding i/o to disk for wavefunctions and kg data. 630 ! mkmem/=0 to avoid i/o; mkmem==0 to use disk i/o for nkpt>=1. 631 if (nkpt_me<=mkmem .and. mkmem/=0 ) then 632 write(message, '(a,i0,a,a,a,i0,a)' ) & 633 & ' mpi_setup: With nkpt_me=',nkpt_me,' and ',nm_mkmem(ii),' = ',mkmem,', ground state wf handled in core.' 634 call wrtout(std_out,message,'COLL') 635 if(nkpt_me<mkmem .and. nkpt_me/=0)then 636 write(message,'(3a)')' Resetting ',nm_mkmem(ii),' to nkpt_me to save memory space.' 637 mkmem=nkpt_me 638 call wrtout(std_out,message,'COLL') 639 end if 640 else if(mkmem/=0)then 641 write(message, '(a,i0,3a,i0,5a)' ) & 642 & ' mpi_setup: With nkpt_me=',nkpt_me,'and ',nm_mkmem(ii),' = ',mkmem,& 643 & ' ground state wf require disk i/o.',ch10,& 644 & ' Resetting ',nm_mkmem(ii),' to zero to save memory space.' 645 mkmem=0 646 call wrtout(std_out,message,'COLL') 647 end if 648 if(dtsets(idtset)%usewvl == 0 .or. dtsets(idtset)%usepaw==1)then 649 if(ii==1)dtsets(idtset)%mkmem=mkmem 650 end if 651 if(ii==2)dtsets(idtset)%mkqmem=mkmem 652 if(ii==3)dtsets(idtset)%mk1mem=mkmem 653 654 if(dtsets(idtset)%usewvl == 1 .and. dtsets(idtset)%usepaw==1 )then 655 if(dtsets(idtset)%mkmem .ne. dtsets(idtset)%nkpt) then 656 MSG_ERROR("mkmem is not allowed for WVL+PAW") 657 end if 658 end if 659 660 end do ! End the loop on the three possiblities mkmem, mkqmem, mk1mem. 661 662 if(dtsets(idtset)%paral_kgb==1) mpi_enregs(idtset)%paralbd=0 663 664 ! Check if some MPI processes are empty (MBPT code uses a complete different MPI algorithm) 665 do_check = all(optdriver /= [RUNL_SCREENING, RUNL_SIGMA, RUNL_BSE]) 666 if (dtsets(idtset)%usewvl == 0 .and. do_check) then 667 if (.not.mpi_distrib_is_ok(mpi_enregs(idtset),mband_upper,& 668 & dtsets(idtset)%nkpt,dtsets(idtset)%mkmem,nsppol,msg=message)) then 669 write(message,'(5a)') trim(message),ch10,& 670 & 'YOU ARE STRONGLY ADVICED TO ACTIVATE AUTOMATIC PARALLELIZATION!',ch10,& 671 & 'PUT "AUTOPARAL=1" IN THE INPUT FILE.' 672 MSG_WARNING(message) 673 end if 674 end if 675 676 ! call mpi_setup1(dtsets(idtset),jdtset,lenstr,mband_upper,mpi_enregs(idtset),string) 677 ! Printing of processor distribution 678 ! MPIWF : here, set up the complete ngfft, containing the information 679 ! for the parallelisation of the FFT 680 call abi_io_redirect(new_io_comm=mpi_enregs(idtset)%comm_world) 681 call libpaw_write_comm_set(mpi_enregs(idtset)%comm_world) 682 683 ! Default values for sequential case 684 paral_fft=0; nproc_fft=1; me_fft=0 685 686 if(dtsets(idtset)%usewvl == 0)then 687 if(optdriver==RUNL_GSTATE.or.optdriver==RUNL_GWLS) then 688 paral_fft=1 ! parallelisation over FFT 689 if (mpi_enregs(idtset)%nproc_cell>0) then 690 if(mpi_enregs(idtset)%paral_kgb == 1) then 691 692 if((dtsets(idtset)%use_gpu_cuda==1).and.(mpi_enregs(idtset)%nproc_fft/=1))then 693 write(message,'(3a,i0)') & 694 & 'When use_gpu_cuda is on, the number of FFT processors, npfft, must be 1',ch10,& 695 & 'However, npfft=',mpi_enregs(idtset)%nproc_fft 696 MSG_ERROR(message) 697 end if 698 699 if(modulo(dtsets(idtset)%ngfft(2),mpi_enregs(idtset)%nproc_fft)/=0)then 700 write(message,'(3a,i0,a,i0)') & 701 & 'The number of FFT processors, npfft, should be a multiple of ngfft(2).',ch10,& 702 & 'However, npfft=',mpi_enregs(idtset)%nproc_fft,' and ngfft(2)=',dtsets(idtset)%ngfft(2) 703 MSG_BUG(message) 704 end if 705 706 do iikpt=1,nkpt*nsppol 707 iikpt_modulo = modulo(iikpt,nkpt)+1 708 if ((dtsets(idtset)%istwfk(iikpt_modulo)==2)) then !.and.(dtsets(idtset)%ngfft(7)==401)) then 709 if ((mpi_enregs(idtset)%bandpp==0).or. & 710 ((mpi_enregs(idtset)%bandpp/=1).and.(modulo(mpi_enregs(idtset)%bandpp,2)/=0))) then 711 write(message,'(3a,i0)') & 712 & 'The number bandpp should be 1 or a multiple of 2',ch10,& 713 & 'However, bandpp=',mpi_enregs(idtset)%bandpp 714 MSG_BUG(message) 715 end if 716 if(modulo(dtsets(idtset)%nband(iikpt),mpi_enregs(idtset)%nproc_band*mpi_enregs(idtset)%bandpp)/=0)then 717 write(message,'(3a,i0,a,i0)') & 718 & 'The number of bands for the k-point, nband_k, should be a multiple of nproc_band*bandpp.',ch10,& 719 & 'However, nband_k=',dtsets(idtset)%nband(iikpt),' and nproc_band*bandpp=', & 720 & mpi_enregs(idtset)%nproc_band* mpi_enregs(idtset)%bandpp 721 MSG_BUG(message) 722 end if 723 else if ((dtsets(idtset)%istwfk(iikpt_modulo)==2) .and. (dtsets(idtset)%ngfft(7)==400)) then 724 MSG_BUG('The fftalg=400 with istwfk=2 is not valid') 725 else 726 if(modulo(dtsets(idtset)%nband(iikpt),mpi_enregs(idtset)%nproc_band*mpi_enregs(idtset)%bandpp)/=0)then 727 write(message,'(3a,i0,a,i0)') & 728 & 'The number of band for the k-point, nband_k, should be a multiple of nproc_band*bandpp.',ch10,& 729 & 'However, nband_k=',dtsets(idtset)%nband(iikpt),' and nproc_band*bandpp=', & 730 & mpi_enregs(idtset)%nproc_band* mpi_enregs(idtset)%bandpp 731 MSG_BUG(message) 732 end if 733 if ((mpi_enregs(idtset)%bandpp==0)) then 734 write(message,'(a,i0,2a,i0,2a,i0)')& 735 & 'The number bandpp should not be 0 with fftalg=',dtsets(idtset)%ngfft(7),ch10,& 736 & 'and istwfk=',dtsets(idtset)%istwfk(iikpt_modulo),ch10,& 737 & 'However, bandpp=',mpi_enregs(idtset)%bandpp 738 MSG_BUG(message) 739 end if 740 end if 741 end do 742 743 if (xmpi_paral==1) then 744 if(modulo(nkpt*nsppol,mpi_enregs(idtset)%nproc_kpt)/=0)then 745 write(message,'(3a,i0,a,i0)') & 746 & 'The number of KPT processors, npkpt, should be a multiple of nkpt*nsppol.',ch10,& 747 & 'However, npkpt=',mpi_enregs(idtset)%nproc_kpt,' and nkpt*nsppol=',nkpt*nsppol 748 MSG_WARNING(message) 749 end if 750 end if 751 else 752 do iikpt=1,nkpt*nsppol 753 iikpt_modulo = modulo(iikpt,nkpt)+1 754 if(modulo(dtsets(idtset)%nband(iikpt),mpi_enregs(idtset)%nproc_band*mpi_enregs(idtset)%bandpp)/=0)then 755 write(message,'(3a,i0,a,i0)') & 756 & 'The number of band for the k-point, nband_k, should be a multiple of npband*bandpp.',ch10,& 757 & 'However, nband_k=',dtsets(idtset)%nband(iikpt),' and npband*bandpp=', & 758 & mpi_enregs(idtset)%nproc_band* mpi_enregs(idtset)%bandpp 759 MSG_BUG(message) 760 end if 761 end do 762 end if 763 end if 764 nproc_fft=mpi_enregs(idtset)%nproc_fft 765 me_fft=mpi_enregs(idtset)%me_fft 766 end if 767 end if 768 769 ! Compute mgfft,mpw,nfft for this data set (it is dependent of mpi_enreg) 770 ABI_ALLOCATE(istwfk,(nkpt)) 771 ABI_ALLOCATE(kpt_with_shift,(3,nkpt)) 772 773 ! Set the default value of fftalg for given npfft but allow the user to override it. 774 ! Warning: If you need to change npfft, **DO IT** before this point so that here we get the correct fftalg 775 dtsets(idtset)%ngfft(7) = fftalg_for_npfft(dtsets(idtset)%npfft) 776 dtsets(idtset)%ngfftdg(7) = fftalg_for_npfft(dtsets(idtset)%npfft) 777 778 fftalg_read=.false. 779 call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'fftalg',tread0,'INT') 780 781 if (tread0==1) then 782 dtsets(idtset)%ngfft(7)=intarr(1) 783 if (usepaw==1) dtsets(idtset)%ngfftdg(7)=intarr(1) 784 fftalg_read=.true. 785 end if 786 787 ecut =dtsets(idtset)%ecut 788 dilatmx =dtsets(idtset)%dilatmx 789 ngfft(:) =dtsets(idtset)%ngfft(:) 790 istwfk(:)=dtsets(idtset)%istwfk(1:nkpt) 791 nsym =dtsets(idtset)%nsym 792 793 nqpt=dtsets(idtset)%nqpt 794 qphon(:)=zero;if(nqpt/=0) qphon(:)=dtsets(idtset)%qptn(:) 795 796 ABI_ALLOCATE(symrel,(3,3,nsym)) 797 symrel(:,:,1:nsym)=dtsets(idtset)%symrel(:,:,1:nsym) 798 ecut_eff=ecut*dilatmx**2 799 800 if (usepaw==1) then 801 call wrtout(std_out,'getng is called for the coarse grid:','COLL') 802 end if 803 kpt=k0; if (response==1.and.usepaw==1) kpt=qphon ! this is temporary 804 805 call getng(dtsets(idtset)%boxcutmin,ecut_eff,gmet,kpt,me_fft,mgfft,nfft,& 806 & ngfft,nproc_fft,nsym,paral_fft,symrel,& 807 & use_gpu_cuda=dtsets(idtset)%use_gpu_cuda) 808 809 dtsets(idtset)%ngfft(:)=ngfft(:) 810 dtsets(idtset)%mgfft=mgfft 811 dtsets(idtset)%nfft=nfft 812 kpt_with_shift(:,:)=dtsets(idtset)%kpt(:,1:nkpt)/dtsets(idtset)%kptnrm 813 814 exchn2n3d=dtsets(idtset)%exchn2n3d 815 nproc_fft=ngfft(10) ; me_fft=ngfft(11) 816 fftalg=ngfft(7); fftalga=fftalg/100; fftalgc=mod(fftalg,10) 817 818 ! Initialize tables for MPI-FFT. 819 call init_distribfft(mpi_enregs(idtset)%distribfft,'c',mpi_enregs(idtset)%nproc_fft,ngfft(2),ngfft(3)) 820 821 if(response/=0)then 822 ! This value of mpw is used in the first part of respfn.f 823 call getmpw(ecut_eff,exchn2n3d,gmet,istwfk,kpt_with_shift,mpi_enregs(idtset),mpw_k,nkpt) 824 end if 825 if(nqpt/=0)then 826 kpt_with_shift(1,:)=kpt_with_shift(1,:)+qphon(1) 827 kpt_with_shift(2,:)=kpt_with_shift(2,:)+qphon(2) 828 kpt_with_shift(3,:)=kpt_with_shift(3,:)+qphon(3) 829 end if 830 if (dtsets(idtset)%usewvl == 0) then 831 call getmpw(ecut_eff,exchn2n3d,gmet,istwfk,kpt_with_shift,mpi_enregs(idtset),mpw,nkpt) 832 ! Allocate tables for parallel IO of the wavefunctions. 833 if( xmpi_mpiio==1 .and. mpi_enregs(idtset)%paral_kgb == 1 .and. & 834 & any(dtsets(idtset)%iomode == [IO_MODE_MPI, IO_MODE_ETSF])) then 835 ABI_ALLOCATE(mpi_enregs(idtset)%my_kgtab,(mpw,dtsets(idtset)%mkmem)) 836 end if 837 else 838 mpw = 0 839 end if 840 841 ! The dimensioning, in the RF case, should be done only with mpw, 842 ! but mpw is used in the first part of respfn.f, and should at least 843 ! be equal to mpw_k . The chosen way to code is not optimal, only convenient : 844 ! it leads to a small waste of memory. 845 if(response/=0 .and. mpw_k>mpw)mpw=mpw_k 846 dtsets(idtset)%ngfft(:)=ngfft(:) 847 848 ! Initialize ngfftc to the initial guess for the coarse mesh 849 ngfftc(:) = 2 850 851 ! In case of PAW, compute fine FFT parameters 852 if (usepaw==1) then 853 ecutdg_eff=dtsets(idtset)%pawecutdg*dtsets(idtset)%dilatmx**2 854 ngfftdg(:)=dtsets(idtset)%ngfftdg(:) 855 call wrtout(std_out,'getng is called for the fine grid:','COLL') 856 ! Start with the coarse mesh as an initial guess for the fine mesh 857 ! This ensures that the fine mesh will not be any coarser than the coarse mesh in each dimension 858 ngfftc(:) = ngfft(1:3) 859 kpt=k0; if (response==1.and.usepaw==1) kpt=qphon ! this is temporary 860 861 call getng(dtsets(idtset)%bxctmindg,ecutdg_eff,gmet,kpt,me_fft,mgfftdg,& 862 & nfftdg,ngfftdg,nproc_fft,nsym,paral_fft,symrel,ngfftc,& 863 & use_gpu_cuda=dtsets(idtset)%use_gpu_cuda) 864 865 dtsets(idtset)%ngfftdg(:)=ngfftdg(:) 866 dtsets(idtset)%mgfftdg=mgfftdg 867 dtsets(idtset)%nfftdg=nfftdg 868 ! Compute fft distribution for fine grid 869 fftalg=ngfft(7); fftalga=fftalg/100; fftalgc=mod(fftalg,10) 870 call init_distribfft(mpi_enregs(idtset)%distribfft,'f', mpi_enregs(idtset)%nproc_fft,ngfftdg(2),ngfftdg(3)) 871 end if 872 873 dtsets(idtset)%mpw=mpw 874 ABI_DEALLOCATE(symrel) 875 ABI_DEALLOCATE(istwfk) 876 ABI_DEALLOCATE(kpt_with_shift) 877 ABI_DEALLOCATE(intarr) 878 ABI_DEALLOCATE(dprarr) 879 880 ! Initialize data for the parallelization over atomic sites (PAW) 881 if (dtsets(idtset)%natom==1) dtsets(idtset)%paral_atom=0 882 if (dtsets(idtset)%usepaw==0) dtsets(idtset)%paral_atom=0 883 if (dtsets(idtset)%usewvl/=0) dtsets(idtset)%paral_atom=0 884 if (dtsets(idtset)%usedmft==1) dtsets(idtset)%paral_atom=0 885 if (optdriver/=RUNL_GSTATE.and.optdriver/=RUNL_RESPFN.and.optdriver/=RUNL_GWLS) dtsets(idtset)%paral_atom=0 886 if (dtsets(idtset)%macro_uj/=0) dtsets(idtset)%paral_atom=0 887 888 call initmpi_atom(dtsets(idtset),mpi_enregs(idtset)) 889 890 ! In case of the use of a GPU (Cuda), some defaults can change 891 ! according to a threshold on matrix sizes 892 if (dtsets(idtset)%use_gpu_cuda==1.or.dtsets(idtset)%use_gpu_cuda==-1) then 893 if (optdriver==RUNL_GSTATE.or.optdriver==RUNL_GWLS) then 894 vectsize=dtsets(idtset)%mpw*dtsets(idtset)%nspinor/dtsets(idtset)%npspinor 895 if (all(dtsets(idtset)%istwfk(:)==2)) vectsize=2*vectsize 896 blocksize=dtsets(idtset)%npband*dtsets(idtset)%bandpp 897 if (dtsets(idtset)%paral_kgb==0) blocksize=dtsets(idtset)%npfft 898 if ((vectsize*blocksize**2)>=dtsets(idtset)%gpu_linalg_limit) then 899 if (.not.wfoptalg_read) then 900 dtsets(idtset)%wfoptalg=14 901 if (.not.fftalg_read) then 902 dtsets(idtset)%ngfft(7) = fftalg_for_npfft(dtsets(idtset)%npfft) 903 if (usepaw==1) dtsets(idtset)%ngfftdg(7) = fftalg_for_npfft(dtsets(idtset)%npfft) 904 end if 905 if (.not.ortalg_read) dtsets(idtset)%ortalg=-abs(dtsets(idtset)%ortalg) 906 end if 907 end if 908 end if 909 end if 910 911 ! initialize data for the parallelization for WVL: 912 if(dtsets(idtset)%usewvl==1) then 913 mpi_enregs(idtset)%comm_wvl=mpi_enregs(idtset)%comm_cell 914 mpi_enregs(idtset)%nproc_wvl=xmpi_comm_size(mpi_enregs(idtset)%comm_wvl) 915 mpi_enregs(idtset)%me_wvl=xmpi_comm_rank(mpi_enregs(idtset)%comm_wvl) 916 end if 917 918 end do 919 920 !This is not a very clean exit in case of paral_kgb<0 921 if (iexit/=0)then 922 MSG_ERROR_NODUMP("aborting now") 923 end if 924 925 DBG_EXIT("COLL") 926 927 end subroutine mpi_setup