TABLE OF CONTENTS


ABINIT/mpi_setup [ Functions ]

[ Top ] [ Functions ]

NAME

 mpi_setup

FUNCTION

 Big loop on the datasets :
 - compute mgfft,mpw,nfft,... for this data set ;
 - fill mpi_enreg
  *** At the output of this routine, all the dtsets input variables are known ***
 The content of dtsets should not be modified anymore afterwards.

COPYRIGHT

 Copyright (C) 1999-2018 ABINIT group (FJ,MT)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt.

INPUTS

  filnam(5)=character strings giving file names
  ndtset= number of datasets to be read; if 0, no multi-dataset mode
  ndtset_alloc=number of datasets, corrected for allocation of at least
      one data set.

OUTPUT

  dtsets(0:ndtset_alloc)=<type datafiles_type>contains all input variables,
   some of which are initialized here, while other were already
   initialized previously.

SIDE EFFECTS

   mpi_enregs=informations about MPI parallelization

PARENTS

      abinit

CHILDREN

      abi_io_redirect,distrb2,distrb2_hf,finddistrproc,get_npert_rbz,getmpw
      getng,init_distribfft,init_mpi_enreg,initmpi_atom,initmpi_grid
      initmpi_img,initmpi_pert,intagm,libpaw_write_comm_set,metric,mkrdim
      wrtout

SOURCE

 45 #if defined HAVE_CONFIG_H
 46 #include "config.h"
 47 #endif
 48 
 49 #include "abi_common.h"
 50 
 51 subroutine mpi_setup(dtsets,filnam,lenstr,mpi_enregs,ndtset,ndtset_alloc,string)
 52 
 53  use defs_basis
 54  use defs_abitypes
 55  use defs_parameters
 56  use m_distribfft
 57  use m_xmpi
 58  use m_errors
 59  use m_profiling_abi
 60 
 61  use m_geometry,     only : metric
 62  use m_parser,       only : intagm
 63  use m_geometry,     only : mkrdim
 64  use m_fftcore,      only : fftalg_for_npfft
 65  use m_mpinfo,       only : init_mpi_enreg,mpi_distrib_is_ok
 66  use m_libpaw_tools, only : libpaw_write_comm_set
 67  use m_dtset,        only : get_npert_rbz
 68  use m_kg,           only : getmpw
 69 
 70 !This section has been created automatically by the script Abilint (TD).
 71 !Do not modify the following lines by hand.
 72 #undef ABI_FUNC
 73 #define ABI_FUNC 'mpi_setup'
 74  use interfaces_14_hidewrite
 75  use interfaces_32_util
 76  use interfaces_51_manage_mpi
 77  use interfaces_52_fft_mpi_noabirule
 78  use interfaces_57_iovars, except_this_one => mpi_setup
 79 !End of the abilint section
 80 
 81  implicit none
 82 
 83 !Arguments ------------------------------------
 84 !scalars
 85  integer,intent(in) :: lenstr,ndtset,ndtset_alloc
 86  type(MPI_type),intent(inout) :: mpi_enregs(0:ndtset_alloc)
 87  character(len=*),intent(inout) :: string
 88 !arrays
 89  character(len=fnlen),intent(in) :: filnam(5)
 90  type(dataset_type),intent(inout) :: dtsets(0:ndtset_alloc)
 91 
 92 !Local variables -------------------------------
 93 !scalars
 94  integer :: blocksize,exchn2n3d,iband,idtset,iexit,ii,iikpt,iikpt_modulo
 95  integer :: isppol,jdtset,marr,mband_lower,mband_upper
 96  integer :: me_fft,mgfft,mgfftdg,mkmem,mpw,mpw_k,optdriver
 97  integer :: nfft,nfftdg,nkpt,nkpt_me,npert,nproc,nproc_fft,nqpt
 98  integer :: nspink,nsppol,nsym,paral_fft,response,tnband,tread0,usepaw,vectsize
 99  integer :: fftalg,fftalga,fftalgc
100 #ifdef HAVE_LINALG_ELPA
101  integer :: icol,irow,np
102 #endif
103  logical :: fftalg_read,ortalg_read,wfoptalg_read,do_check
104  real(dp) :: dilatmx,ecut,ecut_eff,ecutdg_eff,ucvol
105  character(len=500) :: message
106 !arrays
107  integer :: ngfft(18),ngfftdg(18),ngfftc(3),tread(11)
108  integer,allocatable :: intarr(:),istwfk(:),symrel(:,:,:)
109  integer,pointer :: nkpt_rbz(:)
110  real(dp),parameter :: k0(3)=(/zero,zero,zero/)
111  real(dp) :: gmet(3,3),gprimd(3,3),kpt(3),qphon(3),rmet(3,3),rprimd(3,3)
112  real(dp),allocatable :: dprarr(:),kpt_with_shift(:,:)
113  real(dp),pointer :: nband_rbz(:,:)
114  character(len=6) :: nm_mkmem(3)
115 
116 !*************************************************************************
117 
118  DBG_ENTER("COLL")
119 
120  iexit=0;mpw_k=0
121 
122  call init_mpi_enreg(mpi_enregs(0))
123  call initmpi_img(dtsets(0),mpi_enregs(0),-1)
124 
125  do idtset=1,ndtset_alloc
126    call init_mpi_enreg(mpi_enregs(idtset))
127 
128    ! Handy read-only variables.
129    optdriver = dtsets(idtset)%optdriver
130 
131 !  Read parallel input parameters
132    marr=max(5,dtsets(idtset)%npsp,dtsets(idtset)%nimage)
133    ABI_ALLOCATE(intarr,(marr))
134    ABI_ALLOCATE(dprarr,(marr))
135    nkpt  =dtsets(idtset)%nkpt
136    nsppol=dtsets(idtset)%nsppol
137    jdtset=dtsets(idtset)%jdtset ; if(ndtset==0)jdtset=0
138    usepaw=dtsets(idtset)%usepaw
139    mband_upper=maxval(dtsets(idtset)%nband(1:nkpt*nsppol))
140    mband_lower=minval(dtsets(idtset)%nband(1:nkpt*nsppol))
141 
142 !  Compute metric for this dataset
143    call mkrdim(dtsets(idtset)%acell_orig(1:3,1),dtsets(idtset)%rprim_orig(1:3,1:3,1),rprimd)
144    call metric(gmet,gprimd,-1,rmet,rprimd,ucvol)
145 
146    ! Read paral_kgb and disable it if not supported in optdriver.
147    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'paral_kgb',tread(1),'INT')
148    if (tread(1)==1) dtsets(idtset)%paral_kgb=intarr(1)
149 
150    if(xmpi_paral==0.and.dtsets(idtset)%paral_kgb==1)then
151      dtsets(idtset)%paral_kgb=0
152      write(message, '(5a)' ) &
153 &     'When ABINIT is compiled without MPI flag,',ch10,&
154 &     'setting paral_kgb/=0 is useless. paral_kgb has been reset to 0.',ch10,&
155 &     'Action : modify compilation option or paral_kgb in the input file.'
156      MSG_WARNING(message)
157    end if
158 
159    if ( ALL(optdriver /= [RUNL_GSTATE, RUNL_GWLS]) .and. dtsets(idtset)%paral_kgb/=0) then
160      dtsets(idtset)%paral_kgb=0
161      write(message, '(a,i0,a)') &
162 &     "paral_kgb != 0 is not available in optdriver ",optdriver,". Setting paral_kgb to 0"
163      MSG_COMMENT(message)
164    end if
165 
166    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'max_ncpus',tread0,'INT')
167    if (tread0==1) dtsets(idtset)%max_ncpus=intarr(1)
168 
169    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'paral_atom',tread0,'INT')
170    if(tread0==1) dtsets(idtset)%paral_atom=intarr(1)
171 
172    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'paral_rf',tread0,'INT')
173    if (tread0==1.and.optdriver==RUNL_RESPFN) dtsets(idtset)%paral_rf=intarr(1)
174 
175    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'npimage',tread(2),'INT')
176    if(tread(2)==1) dtsets(idtset)%npimage=intarr(1)
177 
178    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'nppert',tread(3),'INT')
179    if (tread(3)==1.and.optdriver==RUNL_RESPFN) dtsets(idtset)%nppert=intarr(1)
180 
181    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'npkpt',tread(4),'INT')
182    if(tread(4)==1) dtsets(idtset)%npkpt=intarr(1)
183 
184    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'npspinor',tread(5),'INT')
185    if(tread(5)==1) dtsets(idtset)%npspinor=intarr(1)
186 
187    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'npfft',tread(6),'INT')
188    if(tread(6)==1) dtsets(idtset)%npfft=intarr(1)
189 
190    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'npband',tread(7),'INT')
191    if(tread(7)==1) dtsets(idtset)%npband=intarr(1)
192 
193    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'bandpp',tread(8),'INT')
194    if(tread(8)==1) dtsets(idtset)%bandpp=intarr(1)
195 
196    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'use_slk',tread(9),'INT')
197    if(tread(9)==1) dtsets(idtset)%use_slk=intarr(1)
198 
199    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'np_slk',tread(10),'INT')
200    if(tread(10)==1) dtsets(idtset)%np_slk=intarr(1)
201 
202    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'pw_unbal_thresh',tread0,'DPR')
203    if(tread0==1) dtsets(idtset)%pw_unbal_thresh=dprarr(1)
204    mpi_enregs(idtset)%pw_unbal_thresh=dtsets(idtset)%pw_unbal_thresh
205 
206    call intagm(dprarr,intarr,jdtset,marr,5,string(1:lenstr),'gpu_devices',tread0,'INT')
207    if(tread0==1) dtsets(idtset)%gpu_devices(1:5)=intarr(1:5)
208 
209    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'gpu_linalg_limit',tread(11),'INT')
210    if(tread(11)==1) dtsets(idtset)%gpu_linalg_limit=intarr(1)
211 
212 
213    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'nphf',tread0,'INT')
214    if(tread0==1) dtsets(idtset)%nphf=intarr(1)
215 
216    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'autoparal',tread0,'INT')
217    if(tread0==1) dtsets(idtset)%autoparal=intarr(1)
218 
219    ! Dump the list of irreducible perturbations and exit.
220    if (dtsets(idtset)%paral_rf==-1) then
221      call get_npert_rbz(dtsets(idtset),nband_rbz,nkpt_rbz,npert)
222      ABI_DEALLOCATE(nband_rbz)
223      ABI_DEALLOCATE(nkpt_rbz)
224      iexit = iexit + 1
225    end if
226 
227 !  From total number of procs, compute all possible distributions
228 !  Ignore exit flag if GW/EPH calculations because autoparal section is performed in screening/sigma/bethe_salpeter/eph
229    call finddistrproc(dtsets,filnam,idtset,iexit,mband_upper,mpi_enregs(idtset),ndtset_alloc,tread)
230    if (any(optdriver == [RUNL_SCREENING, RUNL_SIGMA, RUNL_BSE, RUNL_EPH])) iexit = 0
231 
232    if ((optdriver/=RUNL_GSTATE.and.optdriver/=RUNL_GWLS).and. &
233 &   (dtsets(idtset)%npkpt/=1   .or.dtsets(idtset)%npband/=1.or.dtsets(idtset)%npfft/=1.or. &
234 &   dtsets(idtset)%npspinor/=1.or.dtsets(idtset)%bandpp/=1)) then
235 !&   .or.(dtsets(idtset)%iscf<0)) then
236      dtsets(idtset)%npkpt=1 ; dtsets(idtset)%npspinor=1 ; dtsets(idtset)%npfft=1
237      dtsets(idtset)%npband=1; dtsets(idtset)%bandpp=1  ; dtsets(idtset)%nphf=1
238      dtsets(idtset)%paral_kgb=0
239      message = 'For non ground state calculation, set bandpp, npfft, npband, npspinor npkpt and nphf to 1'
240      MSG_WARNING(message)
241    end if
242 
243 !  Read again some input data to take into account a possible change of paral_kgb
244    wfoptalg_read=.false.
245    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'wfoptalg',tread0,'INT')
246    if(tread0==1) then
247      dtsets(idtset)%wfoptalg=intarr(1)
248      wfoptalg_read=.true.
249    else
250      if (dtsets(idtset)%usepaw==0) dtsets(idtset)%wfoptalg=0
251      if (dtsets(idtset)%usepaw/=0) dtsets(idtset)%wfoptalg=10
252      if ((optdriver==RUNL_GSTATE.or.optdriver==RUNL_GWLS).and.dtsets(idtset)%paral_kgb/=0) dtsets(idtset)%wfoptalg=114
253      if (mod(dtsets(idtset)%wfoptalg,10)==4) then
254        do iikpt=1,dtsets(idtset)%nkpt
255          if (any(abs(dtsets(idtset)%kpt(:,iikpt))>tol8)) dtsets(idtset)%istwfk(iikpt)=1
256        end do
257      end if
258    end if
259 
260    dtsets(idtset)%densfor_pred=2
261    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'densfor_pred',tread0,'INT')
262    if(tread0==1) then
263      dtsets(idtset)%densfor_pred=intarr(1)
264    else
265      if (dtsets(idtset)%paral_kgb==1) dtsets(idtset)%densfor_pred=6
266    end if
267    if((dtsets(idtset)%iscf==5.or.dtsets(idtset)%iscf==6) &
268 &   .and. dtsets(idtset)%ionmov==4 .and. dtsets(idtset)%densfor_pred/=3 )then
269      dtsets(idtset)%densfor_pred=3
270      write(message, '(a,a,a)' )&
271 &     'When ionmov==4 and iscf==5 or 6, densfor_pred must be 3.',ch10,&
272 &     'Set densfor_pred to 3.'
273      MSG_COMMENT(message)
274    end if
275 
276 #ifdef HAVE_LOTF
277 !  LOTF need densfor_pred=2
278    if(dtsets(idtset)%ionmov==23) dtsets(idtset)%densfor_pred=2
279 #endif
280 
281    if (usepaw==0) then
282      dtsets(idtset)%ortalg=2
283    else
284      dtsets(idtset)%ortalg=-2
285    end if
286    ortalg_read=.false.
287    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'ortalg',tread0,'INT')
288    if(tread0==1) then
289      dtsets(idtset)%ortalg=intarr(1)
290      ortalg_read=.true.
291    else if (dtsets(idtset)%wfoptalg>=10 .and. dtsets(idtset)%ortalg>0) then
292      dtsets(idtset)%ortalg=-dtsets(idtset)%ortalg
293    end if
294 
295    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'iomode',tread0,'INT')
296    if(tread0==1) then
297      dtsets(idtset)%iomode=intarr(1)
298    else
299      if ((xmpi_mpiio==1).and.(dtsets(idtset)%paral_kgb==1)) dtsets(idtset)%iomode=IO_MODE_MPI
300 #ifdef HAVE_NETCDF_DEFAULT
301      dtsets(idtset)%iomode=IO_MODE_ETSF
302 #endif
303    end if
304 
305    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'pawmixdg',tread0,'INT')
306    if(tread0==1) then
307      dtsets(idtset)%pawmixdg=intarr(1)
308    else if (dtsets(idtset)%npfft>1.and.usepaw==1) then
309      dtsets(idtset)%pawmixdg=1
310    end if
311 
312    call initmpi_img(dtsets(idtset),mpi_enregs(idtset),-1)
313    nproc=mpi_enregs(idtset)%nproc_cell
314 
315 !  Cycle if the processor is not used
316    if (mpi_enregs(idtset)%me<0) then
317      ABI_DEALLOCATE(intarr)
318      ABI_DEALLOCATE(dprarr)
319      cycle
320    end if
321 
322    response=0
323    if (dtsets(idtset)%rfddk/=0 .or. dtsets(idtset)%rf2_dkdk/=0 .or. dtsets(idtset)%rf2_dkde/=0 .or. &
324 &   dtsets(idtset)%rfelfd/=0 .or. dtsets(idtset)%rfphon/=0 .or. dtsets(idtset)%rfstrs/=0 .or. &
325 &   dtsets(idtset)%rfuser/=0 .or. dtsets(idtset)%rfmagn/=0) response=1
326 
327 !  If no MPI, set all npxxx variables to 1
328    if (nproc==1) then
329      dtsets(idtset)%npkpt    = 1 ; dtsets(idtset)%npband   = 1
330      dtsets(idtset)%npfft    = 1 ; dtsets(idtset)%npspinor = 1
331      dtsets(idtset)%nphf     = 1
332    end if
333 
334 !    --IF CUDA AND RECURSION:ONLY BAND PARALLELISATION
335    if(dtsets(idtset)%tfkinfunc==2 .and. nproc/=1)then
336      dtsets(idtset)%npband = dtsets(idtset)%npband*dtsets(idtset)%npkpt*dtsets(idtset)%npspinor*dtsets(idtset)%npfft
337      dtsets(idtset)%npkpt = 1
338      dtsets(idtset)%npfft = 1
339      dtsets(idtset)%npspinor = 1
340      write(message, '(5a,i6,a)' )&
341 &     'If HAVE_GPU_CUDA and recursion are used ',ch10,&
342 &     'only the band parallelisation is active, we set:',ch10,&
343 &     'npfft= 1, npkpt= 1, npband=',dtsets(idtset)%npband,' .'
344      MSG_WARNING(message)
345    end if
346 
347    if (dtsets(idtset)%npspinor>=2.and.dtsets(idtset)%nspinor==1) then
348      dtsets(idtset)%npspinor=1
349      dtsets(idtset)%npfft=2*dtsets(idtset)%npfft
350      write(message,'(3a)')&
351 &     'npspinor is bigger than nspinor !',ch10,&
352 &     'We set npspinor to 1 ; we set npfft to 2*npfft'
353      MSG_WARNING(message)
354    end if
355 
356 !  Some checks on parallelization data
357    if(dtsets(idtset)%paral_kgb < 0 ) then
358      cycle
359    else if(dtsets(idtset)%paral_kgb/=0.and.(dtsets(idtset)%bandpp/=1.or.dtsets(idtset)%npband/=1.or.&
360 &     dtsets(idtset)%npfft/=1.or.dtsets(idtset)%npkpt/=1.or.dtsets(idtset)%npspinor/=1))then
361      if(dtsets(idtset)%npkpt*dtsets(idtset)%npfft*dtsets(idtset)%npband*dtsets(idtset)%npspinor > nproc )then
362        write(message,'(7a)')&
363 &       'The product of npkpt, npfft, npband and npspinor is bigger than the number of processors.',ch10,&
364 &       'The user-defined values of npkpt, npfft, npband or npspinor will be modified,',ch10,&
365 &       'in order to bring this product below nproc .',ch10,&
366 &       'At present, only a very simple algorithm is used ...'
367        MSG_WARNING(message)
368 
369        if(dtsets(idtset)%npkpt*dtsets(idtset)%npband*dtsets(idtset)%npspinor <= nproc) then
370          dtsets(idtset)%npfft=1
371          MSG_WARNING('Set npfft to 1')
372        else if(dtsets(idtset)%npkpt*dtsets(idtset)%npspinor <= nproc)then
373          dtsets(idtset)%npfft=1
374          dtsets(idtset)%npband=1
375          MSG_WARNING('Set npfft and npband to 1')
376        else if(dtsets(idtset)%npkpt <= nproc)then
377          dtsets(idtset)%npfft=1
378          dtsets(idtset)%npband=1
379          dtsets(idtset)%npspinor=1
380          MSG_WARNING('Set npfft ,npband and npspinor to 1')
381        else
382          dtsets(idtset)%npfft=1
383          dtsets(idtset)%npband=1
384          dtsets(idtset)%npkpt=1
385          dtsets(idtset)%npspinor=1
386          MSG_WARNING('Set npfft, npband, nspinor and npkpt to 1')
387        end if
388      else if(dtsets(idtset)%npkpt*dtsets(idtset)%npfft*dtsets(idtset)%npband*dtsets(idtset)%npspinor < nproc)then
389        write(message,'(2a)')&
390 &       'The number of processor must not be greater than npfft*npband*npkpt*npsinor ',&
391 &       'when npfft or npkpt or npband or nspinor are chosen manually in the input file.'
392        MSG_ERROR(message)
393      end if
394    end if
395 
396 !  LOBPCG and ChebFi need paral_kgb=1 in parallel
397    if ((dtsets(idtset)%npband*dtsets(idtset)%npfft>1).and. &
398 &   (mod(dtsets(idtset)%wfoptalg,10)==1.or.mod(dtsets(idtset)%wfoptalg,10)==4)) then
399      dtsets(idtset)%paral_kgb=1
400    end if
401 
402 !  Check size of Scalapack communicator
403 #ifdef HAVE_LINALG_ELPA
404    if(dtsets(idtset)%paral_kgb>0.and.dtsets(idtset)%np_slk>0) then
405      np=min(dtsets(idtset)%np_slk,dtsets(idtset)%npband*dtsets(idtset)%npfft*dtsets(idtset)%npspinor)
406      irow=int(sqrt(float(np)))
407      do while(mod(np,irow)/=0)
408        irow=irow-1
409      end do
410      icol=nproc/irow
411      if (icol>mband_lower) then
412        do while(icol>mband_lower)
413          icol=icol-1
414          do while(mod(np,icol)/=0)
415            icol=icol-1
416          end do
417        end do
418        dtsets(idtset)%np_slk=icol
419        write(message,'(5a,i6,a)')&
420 &       'The number of band*fft*spinor processors was not consistent with',ch10,&
421 &       'the size of communicator used for ELPA library (np_slk).',ch10,&
422 &       'np_slk value has been adjusted to ',dtsets(idtset)%np_slk,'.'
423        MSG_COMMENT(message)
424      end if
425    end if
426 #endif
427 
428    !Additional check in case of a parallelized Hartree-Fock calculation
429    !   %usefock == option to perform Fock exchange calculation
430    !   %nphf   == number of processors for Fock exchange calculation
431    if ((dtsets(idtset)%usefock==1).and.(dtsets(idtset)%nphf/=1)) then
432 
433      if ((dtsets(idtset)%nphf<0).or.(dtsets(idtset)%nphf==0)) then
434        MSG_ERROR('The value of variable nphf should be a non negative integer.')
435      end if
436      if (dtsets(idtset)%paral_kgb/=0) then
437        message = 'Option paral_kgb should be turned off (value 0) for a parallelized Hartree-Fock calculation.'
438        MSG_ERROR(message)
439      end if
440      if (response/=0) then
441        message = 'A response function calculation is not yet possible with a parallelized Hartree-Fock calculation.'
442        MSG_ERROR(message)
443      end if
444      if (dtsets(idtset)%npspinor>1) then
445        message = 'The parallelism on spinors is not supported by a parallelized Hartree-Fock calculation.'
446        MSG_ERROR(message)
447      end if
448      if (dtsets(idtset)%npkpt*dtsets(idtset)%nphf > nproc )then
449        write(message,'(a,3(a,i0))') ch10,&
450 &       'The product of variables npkpt and nphf is bigger than the number of processors: nkpt= ',&
451 &       dtsets(idtset)%npkpt,' nphf= ',dtsets(idtset)%nphf  ,' and nproc= ', nproc
452        MSG_ERROR(message)
453      end if
454    end if ! Fock
455 
456    !When using chebfi, the number of blocks is equal to the number of processors
457    if((dtsets(idtset)%wfoptalg == 1)) then
458      !Nband might have different values for different kpoint, but not bandpp.
459      !In this case, we just use the largest nband, andthe input will probably fail
460      !at the bandpp check later on
461      dtsets(idtset)%bandpp = mband_upper / dtsets(idtset)%npband
462    end if
463 
464 !  Set mpi_enreg
465    mpi_enregs(idtset)%paral_kgb=dtsets(idtset)%paral_kgb
466    if(dtsets(idtset)%paral_kgb/=0)then
467      mpi_enregs(idtset)%nproc_kpt=dtsets(idtset)%npkpt
468      mpi_enregs(idtset)%nproc_fft=dtsets(idtset)%npfft
469      mpi_enregs(idtset)%nproc_band=dtsets(idtset)%npband
470      mpi_enregs(idtset)%nproc_spinor=min(dtsets(idtset)%npspinor,dtsets(idtset)%nspinor)
471      mpi_enregs(idtset)%bandpp=dtsets(idtset)%bandpp
472 !    Additional setting in case of hybrid functional calculation => not yet tested (CMartins)
473 !     if (dtsets(idtset)%usefock==1) then
474 !       mpi_enregs(idtset)%nproc_hf = dtsets(idtset)%nphf
475 !       if (dtsets(idtset)%nphf>1) mpi_enregs(idtset)%paral_hf=1
476 !     end if
477    else
478      mpi_enregs(idtset)%bandpp = dtsets(idtset)%bandpp
479 !    Additional setting in case of a Fock exchange of PBE0 calculation
480      if (dtsets(idtset)%usefock==1) then
481        if (dtsets(idtset)%nphf>1) mpi_enregs(idtset)%paral_hf=1
482        mpi_enregs(idtset)%nproc_hf = dtsets(idtset)%nphf
483        if (dtsets(idtset)%npkpt/=1) then
484          mpi_enregs(idtset)%nproc_kpt = dtsets(idtset)%npkpt
485        else
486          mpi_enregs(idtset)%nproc_kpt = mpi_enregs(idtset)%nproc_cell/mpi_enregs(idtset)%nproc_hf
487        end if
488      else
489        mpi_enregs(idtset)%nproc_kpt = mpi_enregs(idtset)%nproc_cell
490      end if
491    end if
492 
493    if(dtsets(idtset)%paral_kgb>=0) then
494 
495 !    Compute processor distribution over perturbations
496      mpi_enregs(idtset)%paral_pert=dtsets(idtset)%paral_rf
497      if (mpi_enregs(idtset)%paral_pert==1) then
498        dtsets(idtset)%nppert=max(1,dtsets(idtset)%nppert)
499        if(dtsets(idtset)%nppert>mpi_enregs(idtset)%nproc) then
500          message=' The number of processors must not be smaller than nppert !'
501          MSG_ERROR(message)
502        end if
503        call initmpi_pert(dtsets(idtset),mpi_enregs(idtset))
504        mpi_enregs(idtset)%nproc_kpt = mpi_enregs(idtset)%nproc_cell
505        nproc=mpi_enregs(idtset)%nproc_cell
506      end if
507 !    Cycle if the processor is not used
508      if (mpi_enregs(idtset)%me<0) then
509        ABI_DEALLOCATE(intarr)
510        ABI_DEALLOCATE(dprarr)
511        cycle
512      end if
513 
514 !    Compute processor distribution over kpt (and eventually band-fft)
515      call initmpi_grid(mpi_enregs(idtset))
516      if(dtsets(idtset)%usewvl==1) mpi_enregs(idtset)%comm_fft=mpi_enregs(idtset)%comm_cell
517 
518 !    Initialize tabs used for k/spin parallelism (with sequential-type values)
519      ABI_ALLOCATE(mpi_enregs(idtset)%proc_distrb,(nkpt,mband_upper,nsppol))
520      ABI_ALLOCATE(mpi_enregs(idtset)%my_kpttab,(nkpt))
521      mpi_enregs(idtset)%proc_distrb(:,:,:)=0
522      mpi_enregs(idtset)%my_kpttab(:)=(/(ii,ii=1,nkpt)/)
523      mpi_enregs(idtset)%my_isppoltab(:)=1;if (dtsets(idtset)%nsppol==1) mpi_enregs(idtset)%my_isppoltab(2)=0
524 
525 !    HF or hybrid calculation : initialization of the array distrb_hf
526      if (dtsets(idtset)%usefock==1) then
527        ABI_ALLOCATE(mpi_enregs(idtset)%distrb_hf,(dtsets(idtset)%nkpthf,dtsets(idtset)%nbandhf,1))
528 !      The dimension of distrb_hf are given by %nkpthf and %nbandhf.
529 !      We assume that there will be no dependence in spinpol for all the occupied states.
530        mpi_enregs(idtset)%distrb_hf=0
531      end if
532 
533 !    Define k-points distribution (determine who I am)
534 !    Note that nkpt_me may differ from processor to processor
535 !    This fact will NOT be taken into account when
536 !    the memory needs will be evaluated in the subroutine memory.
537 !    Also, the reduction of k points due to symmetry in RF calculations
538 !    is NOT taken into account. This should be changed later ...
539      nkpt_me=nkpt
540      if(xmpi_paral==1 .and. dtsets(idtset)%usewvl == 0) then
541        nkpt_me=0
542        if(response==0 .or. (response==1 .and. dtsets(idtset)%efmas==1))then
543          mpi_enregs(idtset)%paralbd=0
544          call distrb2(mband_upper,dtsets(idtset)%nband,nkpt,nproc,nsppol,mpi_enregs(idtset))
545          do iikpt=1,nkpt
546            if(.not.(proc_distrb_cycle(mpi_enregs(idtset)%proc_distrb,iikpt,1,1,-1,mpi_enregs(idtset)%me_kpt)))&
547 &           nkpt_me=nkpt_me+1
548          end do ! ikpt=1,nkpt
549 !        HF or hybrid calculation : define the occupied states distribution (in array distrb_hf)
550          if (dtsets(idtset)%usefock==1) then
551            call distrb2_hf(dtsets(idtset)%nbandhf,dtsets(idtset)%nkpthf,nproc,nsppol,mpi_enregs(idtset))
552          end if
553        else ! response==1
554 !        Wrongly assumes that the number of elements of the
555 !        k-point sets of the two spin polarizations is the maximal
556 !        value of one of these k-point sets ...
557 !        This is to be corrected when RF is implemented
558 !        for spin-polarized case.
559          mpi_enregs(idtset)%paralbd=1
560 !        nproc=mpi_enregs(idtset)%nproc_cell*mpi_enregs(idtset)%nproc_pert
561          call distrb2(mband_upper,dtsets(idtset)%nband,nkpt,nproc,nsppol,mpi_enregs(idtset))
562          do isppol=1,nsppol
563            nspink=0
564            do iikpt=1,nkpt
565              do iband=1,dtsets(idtset)%nband(iikpt+(isppol-1)*nkpt)
566                if(mpi_enregs(idtset)%proc_distrb(iikpt,iband,isppol)==mpi_enregs(idtset)%me_cell)then
567                  nspink=nspink+1
568                  exit
569                end if
570              end do ! iband
571            end do ! iikpt
572            if(nspink>nkpt_me)nkpt_me=nspink
573          end do ! isppol
574 !        Is nband present in input file or automatically estimated ?
575          tnband=0
576          call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'nband',tnband,'INT')
577 !        If the number of bands was estimated, there might be a side effect
578 !        when the definitive number of bands is known. k points
579 !        might be attributed to different processors than the present
580 !        proc_distrb describes. At most, the number of k points could increase by 1 ...
581          if(tnband==0)nkpt_me=nkpt_me+1
582 !        In any case, the maximal number of k points is nkpt
583          if(nkpt_me>nkpt)nkpt_me=nkpt
584        end if
585      end if
586    end if
587 
588 !  Take care of mkmems. Use the generic name -mkmem- for mkmem as well as mkqmem
589 !  and mk1mem.
590    nm_mkmem(1)='mkmem '
591    nm_mkmem(2)='mkqmem'
592    nm_mkmem(3)='mk1mem'
593 
594    do ii=1,3
595 
596 !    Read in mkmem here if it is in the input file
597      if(ii==1)then
598        call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'mkmem',tread0,'INT')
599      else if(ii==2)then
600        call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'mkqmem',tread0,'INT')
601      else if(ii==3)then
602        call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'mk1mem',tread0,'INT')
603      end if
604 
605 !    Note that mkmem is used as a dummy variable, representing mkmem as well
606 !    as mkqmem, and mk1mem.
607      if(tread0==1) then
608        mkmem=intarr(1)
609        if (mkmem<0) then
610 !        mkmem is unreasonable; must be zero or positive
611          write(message, '(4a,i0,4a)')&
612 &         nm_mkmem(ii),' must be positive or null but ',nm_mkmem(ii),' =',mkmem,ch10,&
613 &         'Use default ',nm_mkmem(ii),' = nkpt .'
614          MSG_WARNING(message)
615          mkmem=nkpt
616        end if
617 
618      else
619 
620 !      mkmem was not set in the input file so default to incore solution
621        write(message,'(6a)') &
622 &       'mpi_setup: ',nm_mkmem(ii),' undefined in the input file.','Use default ',nm_mkmem(ii),' = nkpt'
623        call wrtout(std_out,message,'COLL')
624        mkmem=nkpt
625      end if
626 
627 !    Check whether nkpt distributed on the processors <= mkmem;
628 !    if so then may run entirely in core,
629 !    avoiding i/o to disk for wavefunctions and kg data.
630 !    mkmem/=0 to avoid i/o; mkmem==0 to use disk i/o for nkpt>=1.
631      if (nkpt_me<=mkmem .and. mkmem/=0 ) then
632        write(message, '(a,i0,a,a,a,i0,a)' ) &
633 &       ' mpi_setup: With nkpt_me=',nkpt_me,' and ',nm_mkmem(ii),' = ',mkmem,', ground state wf handled in core.'
634        call wrtout(std_out,message,'COLL')
635        if(nkpt_me<mkmem .and. nkpt_me/=0)then
636          write(message,'(3a)')' Resetting ',nm_mkmem(ii),' to nkpt_me to save memory space.'
637          mkmem=nkpt_me
638          call wrtout(std_out,message,'COLL')
639        end if
640      else if(mkmem/=0)then
641        write(message, '(a,i0,3a,i0,5a)' ) &
642 &       ' mpi_setup: With nkpt_me=',nkpt_me,'and ',nm_mkmem(ii),' = ',mkmem,&
643 &       ' ground state wf require disk i/o.',ch10,&
644 &       ' Resetting ',nm_mkmem(ii),' to zero to save memory space.'
645        mkmem=0
646        call wrtout(std_out,message,'COLL')
647      end if
648      if(dtsets(idtset)%usewvl == 0 .or. dtsets(idtset)%usepaw==1)then
649        if(ii==1)dtsets(idtset)%mkmem=mkmem
650      end if
651      if(ii==2)dtsets(idtset)%mkqmem=mkmem
652      if(ii==3)dtsets(idtset)%mk1mem=mkmem
653 
654      if(dtsets(idtset)%usewvl == 1 .and. dtsets(idtset)%usepaw==1 )then
655        if(dtsets(idtset)%mkmem .ne. dtsets(idtset)%nkpt) then
656          MSG_ERROR("mkmem is not allowed for WVL+PAW")
657        end if
658      end if
659 
660    end do  ! End the loop on the three possiblities mkmem, mkqmem, mk1mem.
661 
662    if(dtsets(idtset)%paral_kgb==1) mpi_enregs(idtset)%paralbd=0
663 
664 !  Check if some MPI processes are empty (MBPT code uses a complete different MPI algorithm)
665    do_check = all(optdriver /= [RUNL_SCREENING, RUNL_SIGMA, RUNL_BSE])
666    if (dtsets(idtset)%usewvl == 0 .and. do_check) then
667      if (.not.mpi_distrib_is_ok(mpi_enregs(idtset),mband_upper,&
668 &     dtsets(idtset)%nkpt,dtsets(idtset)%mkmem,nsppol,msg=message)) then
669        write(message,'(5a)') trim(message),ch10,&
670 &       'YOU ARE STRONGLY ADVICED TO ACTIVATE AUTOMATIC PARALLELIZATION!',ch10,&
671 &       'PUT "AUTOPARAL=1" IN THE INPUT FILE.'
672        MSG_WARNING(message)
673      end if
674    end if
675 
676 !  call mpi_setup1(dtsets(idtset),jdtset,lenstr,mband_upper,mpi_enregs(idtset),string)
677 !  Printing of processor distribution
678 !  MPIWF : here, set up the complete ngfft, containing the information
679 !  for the parallelisation of the FFT
680    call abi_io_redirect(new_io_comm=mpi_enregs(idtset)%comm_world)
681    call libpaw_write_comm_set(mpi_enregs(idtset)%comm_world)
682 
683 !  Default values for sequential case
684    paral_fft=0; nproc_fft=1; me_fft=0
685 
686    if(dtsets(idtset)%usewvl == 0)then
687      if(optdriver==RUNL_GSTATE.or.optdriver==RUNL_GWLS) then
688        paral_fft=1           ! parallelisation over FFT
689        if (mpi_enregs(idtset)%nproc_cell>0) then
690          if(mpi_enregs(idtset)%paral_kgb == 1) then
691 
692            if((dtsets(idtset)%use_gpu_cuda==1).and.(mpi_enregs(idtset)%nproc_fft/=1))then
693              write(message,'(3a,i0)') &
694 &             'When use_gpu_cuda is on, the number of FFT processors, npfft, must be 1',ch10,&
695 &             'However, npfft=',mpi_enregs(idtset)%nproc_fft
696              MSG_ERROR(message)
697            end if
698 
699            if(modulo(dtsets(idtset)%ngfft(2),mpi_enregs(idtset)%nproc_fft)/=0)then
700              write(message,'(3a,i0,a,i0)') &
701 &             'The number of FFT processors, npfft, should be a multiple of ngfft(2).',ch10,&
702 &             'However, npfft=',mpi_enregs(idtset)%nproc_fft,' and ngfft(2)=',dtsets(idtset)%ngfft(2)
703              MSG_BUG(message)
704            end if
705 
706            do iikpt=1,nkpt*nsppol
707              iikpt_modulo = modulo(iikpt,nkpt)+1
708              if ((dtsets(idtset)%istwfk(iikpt_modulo)==2)) then !.and.(dtsets(idtset)%ngfft(7)==401)) then
709                if ((mpi_enregs(idtset)%bandpp==0).or. &
710                ((mpi_enregs(idtset)%bandpp/=1).and.(modulo(mpi_enregs(idtset)%bandpp,2)/=0))) then
711                  write(message,'(3a,i0)') &
712 &                 'The number bandpp should be 1 or a multiple of 2',ch10,&
713 &                 'However, bandpp=',mpi_enregs(idtset)%bandpp
714                  MSG_BUG(message)
715                end if
716                if(modulo(dtsets(idtset)%nband(iikpt),mpi_enregs(idtset)%nproc_band*mpi_enregs(idtset)%bandpp)/=0)then
717                  write(message,'(3a,i0,a,i0)') &
718 &                 'The number of bands for the k-point, nband_k, should be a multiple of nproc_band*bandpp.',ch10,&
719 &                 'However, nband_k=',dtsets(idtset)%nband(iikpt),' and nproc_band*bandpp=', &
720 &                 mpi_enregs(idtset)%nproc_band* mpi_enregs(idtset)%bandpp
721                  MSG_BUG(message)
722                end if
723              else if ((dtsets(idtset)%istwfk(iikpt_modulo)==2) .and. (dtsets(idtset)%ngfft(7)==400)) then
724                MSG_BUG('The fftalg=400 with istwfk=2 is not valid')
725              else
726                if(modulo(dtsets(idtset)%nband(iikpt),mpi_enregs(idtset)%nproc_band*mpi_enregs(idtset)%bandpp)/=0)then
727                  write(message,'(3a,i0,a,i0)') &
728 &                 'The number of band for the k-point, nband_k, should be a multiple of nproc_band*bandpp.',ch10,&
729 &                 'However, nband_k=',dtsets(idtset)%nband(iikpt),' and nproc_band*bandpp=', &
730 &                 mpi_enregs(idtset)%nproc_band* mpi_enregs(idtset)%bandpp
731                  MSG_BUG(message)
732                end if
733                if ((mpi_enregs(idtset)%bandpp==0)) then
734                  write(message,'(a,i0,2a,i0,2a,i0)')&
735 &                 'The number bandpp should not be 0 with fftalg=',dtsets(idtset)%ngfft(7),ch10,&
736 &                 'and istwfk=',dtsets(idtset)%istwfk(iikpt_modulo),ch10,&
737 &                 'However, bandpp=',mpi_enregs(idtset)%bandpp
738                  MSG_BUG(message)
739                end if
740              end if
741            end do
742 
743            if (xmpi_paral==1) then
744              if(modulo(nkpt*nsppol,mpi_enregs(idtset)%nproc_kpt)/=0)then
745                write(message,'(3a,i0,a,i0)') &
746 &               'The number of KPT processors, npkpt, should be a multiple of nkpt*nsppol.',ch10,&
747 &               'However, npkpt=',mpi_enregs(idtset)%nproc_kpt,' and nkpt*nsppol=',nkpt*nsppol
748                MSG_WARNING(message)
749              end if
750            end if
751          else
752            do iikpt=1,nkpt*nsppol
753              iikpt_modulo = modulo(iikpt,nkpt)+1
754              if(modulo(dtsets(idtset)%nband(iikpt),mpi_enregs(idtset)%nproc_band*mpi_enregs(idtset)%bandpp)/=0)then
755                write(message,'(3a,i0,a,i0)') &
756 &               'The number of band for the k-point, nband_k, should be a multiple of npband*bandpp.',ch10,&
757 &               'However, nband_k=',dtsets(idtset)%nband(iikpt),' and npband*bandpp=', &
758 &               mpi_enregs(idtset)%nproc_band* mpi_enregs(idtset)%bandpp
759                MSG_BUG(message)
760              end if
761            end do
762          end if
763        end if
764        nproc_fft=mpi_enregs(idtset)%nproc_fft
765        me_fft=mpi_enregs(idtset)%me_fft
766      end if
767    end if
768 
769 !  Compute mgfft,mpw,nfft for this data set (it is dependent of mpi_enreg)
770    ABI_ALLOCATE(istwfk,(nkpt))
771    ABI_ALLOCATE(kpt_with_shift,(3,nkpt))
772 
773    ! Set the default value of fftalg for given npfft but allow the user to override it.
774    ! Warning: If you need to change npfft, **DO IT** before this point so that here we get the correct fftalg
775    dtsets(idtset)%ngfft(7) = fftalg_for_npfft(dtsets(idtset)%npfft)
776    dtsets(idtset)%ngfftdg(7) = fftalg_for_npfft(dtsets(idtset)%npfft)
777 
778    fftalg_read=.false.
779    call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'fftalg',tread0,'INT')
780 
781    if (tread0==1) then
782      dtsets(idtset)%ngfft(7)=intarr(1)
783      if (usepaw==1) dtsets(idtset)%ngfftdg(7)=intarr(1)
784      fftalg_read=.true.
785    end if
786 
787    ecut     =dtsets(idtset)%ecut
788    dilatmx  =dtsets(idtset)%dilatmx
789    ngfft(:) =dtsets(idtset)%ngfft(:)
790    istwfk(:)=dtsets(idtset)%istwfk(1:nkpt)
791    nsym     =dtsets(idtset)%nsym
792 
793    nqpt=dtsets(idtset)%nqpt
794    qphon(:)=zero;if(nqpt/=0) qphon(:)=dtsets(idtset)%qptn(:)
795 
796    ABI_ALLOCATE(symrel,(3,3,nsym))
797    symrel(:,:,1:nsym)=dtsets(idtset)%symrel(:,:,1:nsym)
798    ecut_eff=ecut*dilatmx**2
799 
800    if (usepaw==1) then
801      call wrtout(std_out,'getng is called for the coarse grid:','COLL')
802    end if
803    kpt=k0; if (response==1.and.usepaw==1) kpt=qphon ! this is temporary
804 
805    call getng(dtsets(idtset)%boxcutmin,ecut_eff,gmet,kpt,me_fft,mgfft,nfft,&
806 &   ngfft,nproc_fft,nsym,paral_fft,symrel,&
807 &   use_gpu_cuda=dtsets(idtset)%use_gpu_cuda)
808 
809    dtsets(idtset)%ngfft(:)=ngfft(:)
810    dtsets(idtset)%mgfft=mgfft
811    dtsets(idtset)%nfft=nfft
812    kpt_with_shift(:,:)=dtsets(idtset)%kpt(:,1:nkpt)/dtsets(idtset)%kptnrm
813 
814    exchn2n3d=dtsets(idtset)%exchn2n3d
815    nproc_fft=ngfft(10) ; me_fft=ngfft(11)
816    fftalg=ngfft(7); fftalga=fftalg/100; fftalgc=mod(fftalg,10)
817 
818    ! Initialize tables for MPI-FFT.
819    call init_distribfft(mpi_enregs(idtset)%distribfft,'c',mpi_enregs(idtset)%nproc_fft,ngfft(2),ngfft(3))
820 
821    if(response/=0)then
822 !    This value of mpw is used in the first part of respfn.f
823      call getmpw(ecut_eff,exchn2n3d,gmet,istwfk,kpt_with_shift,mpi_enregs(idtset),mpw_k,nkpt)
824    end if
825    if(nqpt/=0)then
826      kpt_with_shift(1,:)=kpt_with_shift(1,:)+qphon(1)
827      kpt_with_shift(2,:)=kpt_with_shift(2,:)+qphon(2)
828      kpt_with_shift(3,:)=kpt_with_shift(3,:)+qphon(3)
829    end if
830    if (dtsets(idtset)%usewvl == 0) then
831      call getmpw(ecut_eff,exchn2n3d,gmet,istwfk,kpt_with_shift,mpi_enregs(idtset),mpw,nkpt)
832      ! Allocate tables for parallel IO of the wavefunctions.
833      if( xmpi_mpiio==1 .and. mpi_enregs(idtset)%paral_kgb == 1 .and. &
834 &     any(dtsets(idtset)%iomode == [IO_MODE_MPI, IO_MODE_ETSF])) then
835        ABI_ALLOCATE(mpi_enregs(idtset)%my_kgtab,(mpw,dtsets(idtset)%mkmem))
836      end if
837    else
838      mpw = 0
839    end if
840 
841 !  The dimensioning, in the RF case, should be done only with mpw,
842 !  but mpw is used in the first part of respfn.f, and should at least
843 !  be equal to mpw_k . The chosen way to code is not optimal, only convenient :
844 !  it leads to a small waste of memory.
845    if(response/=0 .and. mpw_k>mpw)mpw=mpw_k
846    dtsets(idtset)%ngfft(:)=ngfft(:)
847 
848 !  Initialize ngfftc to the initial guess for the coarse mesh
849    ngfftc(:) = 2
850 
851 !  In case of PAW, compute fine FFT parameters
852    if (usepaw==1) then
853      ecutdg_eff=dtsets(idtset)%pawecutdg*dtsets(idtset)%dilatmx**2
854      ngfftdg(:)=dtsets(idtset)%ngfftdg(:)
855      call wrtout(std_out,'getng is called for the fine grid:','COLL')
856 !    Start with the coarse mesh as an initial guess for the fine mesh
857 !    This ensures that the fine mesh will not be any coarser than the coarse mesh in each dimension
858      ngfftc(:) = ngfft(1:3)
859      kpt=k0; if (response==1.and.usepaw==1) kpt=qphon  ! this is temporary
860 
861      call getng(dtsets(idtset)%bxctmindg,ecutdg_eff,gmet,kpt,me_fft,mgfftdg,&
862 &     nfftdg,ngfftdg,nproc_fft,nsym,paral_fft,symrel,ngfftc,&
863 &     use_gpu_cuda=dtsets(idtset)%use_gpu_cuda)
864 
865      dtsets(idtset)%ngfftdg(:)=ngfftdg(:)
866      dtsets(idtset)%mgfftdg=mgfftdg
867      dtsets(idtset)%nfftdg=nfftdg
868 !    Compute fft distribution for fine grid
869      fftalg=ngfft(7); fftalga=fftalg/100; fftalgc=mod(fftalg,10)
870      call init_distribfft(mpi_enregs(idtset)%distribfft,'f', mpi_enregs(idtset)%nproc_fft,ngfftdg(2),ngfftdg(3))
871    end if
872 
873    dtsets(idtset)%mpw=mpw
874    ABI_DEALLOCATE(symrel)
875    ABI_DEALLOCATE(istwfk)
876    ABI_DEALLOCATE(kpt_with_shift)
877    ABI_DEALLOCATE(intarr)
878    ABI_DEALLOCATE(dprarr)
879 
880 !  Initialize data for the parallelization over atomic sites (PAW)
881    if (dtsets(idtset)%natom==1) dtsets(idtset)%paral_atom=0
882    if (dtsets(idtset)%usepaw==0) dtsets(idtset)%paral_atom=0
883    if (dtsets(idtset)%usewvl/=0) dtsets(idtset)%paral_atom=0
884    if (dtsets(idtset)%usedmft==1) dtsets(idtset)%paral_atom=0
885    if (optdriver/=RUNL_GSTATE.and.optdriver/=RUNL_RESPFN.and.optdriver/=RUNL_GWLS) dtsets(idtset)%paral_atom=0
886    if (dtsets(idtset)%macro_uj/=0) dtsets(idtset)%paral_atom=0
887 
888    call initmpi_atom(dtsets(idtset),mpi_enregs(idtset))
889 
890 !  In case of the use of a GPU (Cuda), some defaults can change
891 !  according to a threshold on matrix sizes
892    if (dtsets(idtset)%use_gpu_cuda==1.or.dtsets(idtset)%use_gpu_cuda==-1) then
893      if (optdriver==RUNL_GSTATE.or.optdriver==RUNL_GWLS) then
894        vectsize=dtsets(idtset)%mpw*dtsets(idtset)%nspinor/dtsets(idtset)%npspinor
895        if (all(dtsets(idtset)%istwfk(:)==2)) vectsize=2*vectsize
896        blocksize=dtsets(idtset)%npband*dtsets(idtset)%bandpp
897        if (dtsets(idtset)%paral_kgb==0) blocksize=dtsets(idtset)%npfft
898        if ((vectsize*blocksize**2)>=dtsets(idtset)%gpu_linalg_limit) then
899          if (.not.wfoptalg_read) then
900            dtsets(idtset)%wfoptalg=14
901            if (.not.fftalg_read) then
902              dtsets(idtset)%ngfft(7) = fftalg_for_npfft(dtsets(idtset)%npfft)
903              if (usepaw==1) dtsets(idtset)%ngfftdg(7) = fftalg_for_npfft(dtsets(idtset)%npfft)
904            end if
905            if (.not.ortalg_read) dtsets(idtset)%ortalg=-abs(dtsets(idtset)%ortalg)
906          end if
907        end if
908      end if
909    end if
910 
911 !  initialize data for the parallelization for WVL:
912    if(dtsets(idtset)%usewvl==1) then
913      mpi_enregs(idtset)%comm_wvl=mpi_enregs(idtset)%comm_cell
914      mpi_enregs(idtset)%nproc_wvl=xmpi_comm_size(mpi_enregs(idtset)%comm_wvl)
915      mpi_enregs(idtset)%me_wvl=xmpi_comm_rank(mpi_enregs(idtset)%comm_wvl)
916    end if
917 
918  end do
919 
920 !This is not a very clean exit in case of paral_kgb<0
921  if (iexit/=0)then
922    MSG_ERROR_NODUMP("aborting now")
923  end if
924 
925  DBG_EXIT("COLL")
926 
927 end subroutine mpi_setup