TABLE OF CONTENTS


ABINIT/m_slk [ Modules ]

[ Top ] [ Modules ]

NAME

 m_slk

FUNCTION

  High-level objects and wrappers around the ScaLAPACK and ELPA API.

COPYRIGHT

 Copyright (C) 2004-2024 ABINIT group (CS,GZ,FB,MG,MT)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

16 #if defined HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19 
20 #include "abi_common.h"
21 
22 module m_slk
23 
24  use defs_basis
25  use m_xmpi
26  use m_errors
27  use m_abicore
28 #ifdef HAVE_LINALG_ELPA
29  use m_elpa
30 #endif
31 #ifdef HAVE_MPI2
32  use mpi
33 #endif
34 
35  use m_fstrings,      only : firstchar, toupper, itoa, sjoin, ltoa
36  use m_time,          only : cwtime, cwtime_report
37  use m_numeric_tools, only : blocked_loop !, print_arr
38 
39  implicit none
40 
41 #ifdef HAVE_MPI1
42  include 'mpif.h'
43 #endif
44 
45  private
46 
47  ! scaLAPACK array descriptor.
48  integer,private,parameter :: DLEN_ = 9    ! length
49  integer,private,parameter :: Dtype_ = 1   ! type
50  integer,private,parameter :: CTXT_ = 2    ! BLACS context
51  integer,private,parameter :: M_ = 3       ! nb global lines
52  integer,private,parameter :: N_ = 4       ! nb global columns
53  integer,private,parameter :: MB_ = 5      ! nb lines of a block
54  integer,private,parameter :: NB_ = 6      ! nb columns of a block
55  integer,private,parameter :: RSRC_ = 7    ! line of processors at the beginning
56  integer,private,parameter :: CSRC_ = 8    ! column of processors at the beginning
57  integer,private,parameter :: LLD_ = 9     ! local number of lines

m_slk/basemat_t [ Types ]

[ Top ] [ m_slk ] [ Types ]

NAME

  basemat_t

FUNCTION

  Base class for scalapack matrices

SOURCE

150  type,private :: basemat_t
151 
152    integer :: sizeb_local(2) = -1
153     ! dimensions of the local buffer
154 
155    integer :: sizeb_global(2) = -1
156     ! dimensions of the global matrix
157 
158    integer :: sizeb_blocs(2) = -1
159     ! size of the block of consecutive data
160 
161    integer :: istwf_k = -1
162 
163    type(processor_scalapack),pointer :: processor => null()
164 
165    type(descript_scalapack) :: descript
166 
167  contains
168 
169    procedure :: init => init_matrix_scalapack
170     ! Constructor
171 
172    procedure :: glob2loc => slk_glob2loc
173     ! Determine the local indices of an element from its global indices and return haveit bool flag.
174 
175    procedure :: loc2glob => slk_matrix_loc2glob
176     ! Return global indices of a matrix element from the local indices.
177 
178    procedure :: loc2grow => slk_matrix_loc2grow
179     ! Determine the global row index from the local index
180 
181    procedure :: loc2gcol => slk_matrix_loc2gcol
182     ! Determine the global column index from the local index
183 
184    procedure :: locmem_mb => locmem_mb
185     ! Return memory allocated for the local buffer in Mb.
186 
187    procedure :: print => slkmat_print
188     ! Print info on the object.
189 
190    procedure :: check_local_shape => slkmat_check_local_shape
191    !  Debugging tool to test the local shape `lshape` of the local buffer.
192 
193    procedure :: free => matrix_scalapack_free
194     ! Free memory
195 
196    procedure :: change_size_blocs => slk_change_size_blocs
197     ! Change the block sizes, return new object.
198 
199    procedure :: get_trace => slk_get_trace
200     ! Compute the trace of an N-by-N distributed matrix.
201 
202    procedure :: set_imag_diago_to_zero => slk_set_imag_diago_to_zero
203     ! Set the imaginary part of the diagonal to zero.
204 
205    procedure :: invert => slk_invert
206     ! Inverse of a complex matrix.
207 
208  end type basemat_t

m_slk/block_dist_1d [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  block_dist_1d

FUNCTION

  Return block size for one-dimensional block column (row) distribution.
  Mainly used to assign blocks of contiguous columns (rows) of a matrix to successive processes
  when a 1d grid is employed.

  It is usually interfaced with CPP macros, e.g:

    ABI_CHECK(block_dist_1d(mat_size, nproc, block_size, msg), msg)

INPUTS

  mat_size=Size of the matrix (either number of rows or number of colums)
  nproc=Number of processoes in the 1D scalapack grid

OUTPUT

  ok= Boolean flag with exit status (idle processes are not allowed).
  block_size=Size of the block along this axis needed for one-dimensional block distribution
  msg=Error message (if not ok)

SOURCE

1327 logical function block_dist_1d(mat_size, nproc, block_size, msg) result (ok)
1328 
1329 !Arguments ------------------------------------
1330  integer, intent(in) :: mat_size, nproc
1331  integer,intent(out) :: block_size
1332  character(len=*),intent(out) :: msg
1333 
1334 ! *********************************************************************
1335 
1336  ok = .True.; msg = ""
1337  !block_size = 1; return
1338 
1339  block_size = mat_size / nproc
1340  if (block_size == 0) then
1341    ok = .False.
1342    write(msg, "(2(a,i0), 2a)") &
1343      "The number of MPI processors: ", nproc, " exceeeds the number of rows (columms) of the matrix: ", mat_size, ch10, &
1344      "Decrease the number of MPI processes for the scalapack level."
1345    return
1346  end if
1347 
1348  if (mod(mat_size, nproc) /= 0) block_size = block_size + 1
1349 
1350 end function block_dist_1d

m_slk/build_processor_scalapack [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  build_processor_scalapack

FUNCTION

  Builds a ScaLAPACK processor descriptor.
  Build of the data related to one processor in a grid

INPUTS

  grid= array representing the grid of processors.
  myproc= selected processor
  comm= MPI communicator

OUTPUT

  processor= descriptor of a processor

SOURCE

470 subroutine build_processor_scalapack(processor, grid, myproc, comm)
471 
472 !Arguments ------------------------------------
473  class(processor_scalapack),intent(inout) :: processor
474  integer,intent(in) :: myproc,comm
475  class(grid_scalapack),intent(in) :: grid
476 
477 ! *********************************************************************
478 
479  processor%grid = grid
480  processor%myproc = myproc
481  processor%comm = comm
482 
483 #ifdef HAVE_LINALG_SCALAPACK
484  call BLACS_GRIDINFO(grid%ictxt, processor%grid%dims(1), processor%grid%dims(2), &
485                      processor%coords(1), processor%coords(2))
486 #endif
487 
488  ! These values are the same as those computed by BLACS_GRIDINFO
489  ! except in the case where the myproc argument is not the local proc
490  processor%coords(1) = INT((myproc) / grid%dims(2))
491  processor%coords(2) = MOD((myproc), grid%dims(2))
492 
493 end subroutine build_processor_scalapack

m_slk/compute_eigen1 [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  compute_eigen1

FUNCTION

  Calculation of eigenvalues and eigenvectors. complex and real cases.

INPUTS

  comm= MPI communicator
  cplex=1 if matrix is real, 2 if complex
  nbli_global number of lines
  nbco_global number of columns
  matrix= the matrix to process
  vector= eigenvalues of the matrix
  istwf_k= 2 if we have a real matrix else complex.
  [use_gpu_elpa]= Flag to activate the use of GPU (ELPA only)

OUTPUT

  vector

SIDE EFFECTS

  results= ScaLAPACK matrix coming out of the operation
  eigen= eigenvalues of the matrix

SOURCE

3252 subroutine compute_eigen1(comm,processor,cplex,nbli_global,nbco_global,matrix,vector,istwf_k,&
3253 &                         use_gpu_elpa) ! Optional argument
3254 
3255 !Arguments ------------------------------------
3256 !scalaras
3257  integer,intent(in) :: comm
3258  integer,intent(in) :: cplex,nbli_global,nbco_global
3259  integer,intent(in) :: istwf_k
3260  class(processor_scalapack),intent(in) :: processor
3261  integer,intent(in),optional :: use_gpu_elpa
3262 !arrays
3263  real(dp),intent(inout) :: matrix(cplex*nbli_global,nbco_global)
3264  real(dp),intent(inout) :: vector(:)
3265 
3266 !Local variables-------------------------------
3267 #ifdef HAVE_LINALG_ELPA
3268  integer :: i,j
3269 #endif
3270  integer :: ierr,use_gpu_elpa_
3271  type(matrix_scalapack) :: sca_matrix1
3272  type(matrix_scalapack) :: sca_matrix2
3273  real(dp),allocatable :: r_tmp_evec(:,:)
3274  complex(dpc),allocatable :: z_tmp_evec(:,:)
3275 
3276 ! *************************************************************************
3277 
3278  use_gpu_elpa_=0
3279 #ifdef HAVE_LINALG_ELPA
3280  if (present(use_gpu_elpa)) use_gpu_elpa_=use_gpu_elpa
3281 #endif
3282 
3283  ! ================================
3284  ! INITIALISATION SCALAPACK MATRIX
3285  ! ================================
3286  call sca_matrix1%init(nbli_global,nbco_global,processor,istwf_k)
3287  call sca_matrix2%init(nbli_global,nbco_global,processor,istwf_k)
3288 
3289  ! ==============================
3290  ! FILLING SCALAPACK MATRIX
3291  ! ==============================
3292  if ( istwf_k /= 2 ) then
3293    ABI_CHECK_IEQ(cplex, 2, "cplex != 2")
3294    ABI_MALLOC(z_tmp_evec,(nbli_global,nbco_global))
3295    z_tmp_evec=cmplx(0._DP,0._DP)
3296 #ifdef HAVE_LINALG_ELPA
3297    ! The full matrix must be set (not only one half like in scalapack).
3298    do j=1,nbco_global
3299       do i=j+1,nbli_global
3300          matrix(2*(i-1)+1,j) = matrix(2*(j-1)+1,i)
3301          matrix(2*(i-1)+2,j) = -matrix(2*(j-1)+2,i)
3302       end do
3303    end do
3304 #endif
3305    call matrix_from_complexmatrix(sca_matrix1,matrix,istwf_k)
3306  else
3307    ABI_CHECK_IEQ(cplex, 1, "cplex != 2")
3308    ABI_MALLOC(r_tmp_evec,(nbli_global,nbco_global))
3309    r_tmp_evec(:,:)=0._DP
3310 #ifdef HAVE_LINALG_ELPA
3311    ! The full matrix must be set (not only one half like in scalapack).
3312    do j=1,nbco_global
3313       do i=j+1,nbli_global
3314          matrix(i,j) = matrix(j,i)
3315       end do
3316    end do
3317 #endif
3318    call matrix_from_realmatrix(sca_matrix1,matrix,istwf_k)
3319  endif
3320 
3321  ! ================================
3322  ! COMPUTE EIGEN VALUES AND VECTORS : A * X = lambda  * X
3323  ! ================================
3324  call compute_eigen_problem(processor,sca_matrix1, sca_matrix2,vector, comm,istwf_k, &
3325 &                           use_gpu_elpa=use_gpu_elpa_)
3326 
3327  ! ==============================
3328  ! CONCATENATE EIGEN VECTORS
3329  ! ==============================
3330 #ifdef HAVE_MPI
3331  if (istwf_k /= 2) then
3332    call matrix_to_complexmatrix(sca_matrix2,z_tmp_evec,istwf_k)
3333    call MPI_ALLREDUCE(z_tmp_evec, matrix, nbli_global*nbco_global, MPI_DOUBLE_complex, MPI_SUM,comm,ierr)
3334  else
3335    call matrix_to_realmatrix(sca_matrix2,r_tmp_evec,istwf_k)
3336    call MPI_ALLREDUCE(r_tmp_evec, matrix, nbli_global*nbco_global, MPI_DOUBLE_PRECISION, MPI_SUM,comm,ierr)
3337  endif
3338 #endif
3339 
3340  ! ====================================
3341  ! DESTRUCTION SCALAPACK AND TMP MATRICES
3342  ! ====================================
3343  call sca_matrix1%free()
3344  call sca_matrix2%free()
3345 
3346  ABI_SFREE(z_tmp_evec)
3347  ABI_SFREE(r_tmp_evec)
3348 
3349 #ifndef HAVE_LINALG_ELPA
3350  ABI_UNUSED(use_gpu_elpa)
3351 #endif
3352 
3353 end subroutine compute_eigen1

m_slk/compute_eigen2 [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  compute_eigen2

FUNCTION

  Calculation of eigenvalues and eigenvectors: A * X = lambda * B * X
  complex and real cases.

INPUTS

  comm= MPI communicator
  cplex=1 if matrix is real, 2 if complex
  nbli_global number of lines
  nbco_global number of columns
  matrix1= first ScaLAPACK matrix (matrix A)
  matrix2= second ScaLAPACK matrix (matrix B)
  vector=
  istwf_k= 2 if we have a real matrix else complex.
  [use_gpu_elpa]= Flag to activate the use of GPU (ELPA only)

SIDE EFFECTS

  results= ScaLAPACK matrix coming out of the operation
  eigen= eigenvalues of the matrix

SOURCE

3383 subroutine compute_eigen2(comm,processor,cplex,nbli_global,nbco_global,matrix1,matrix2,vector,istwf_k, &
3384 &                         use_gpu_elpa) ! Optional argument
3385 
3386 !Arguments ------------------------------------
3387 !scalars
3388  integer,intent(in) :: cplex,nbli_global,nbco_global
3389  integer,intent(in) :: comm
3390  integer,intent(in) :: istwf_k
3391  class(processor_scalapack),intent(in) :: processor
3392  integer,optional,intent(in) :: use_gpu_elpa
3393 !arrays
3394  real(dp),intent(inout) :: matrix1(cplex*nbli_global,nbco_global)
3395  real(dp),intent(inout) :: matrix2(cplex*nbli_global,nbco_global)
3396  real(dp),intent(inout) :: vector(:)
3397 
3398 !Local variables-------------------------------
3399 #ifdef HAVE_LINALG_ELPA
3400  integer :: i,j
3401 #endif
3402  integer :: ierr,use_gpu_elpa_
3403  type(matrix_scalapack) :: sca_matrix1, sca_matrix2, sca_matrix3
3404  real(dp),allocatable :: r_tmp_evec(:,:)
3405  complex(dpc),allocatable :: z_tmp_evec(:,:)
3406 
3407 ! *************************************************************************
3408 
3409  use_gpu_elpa_=0
3410 #if defined HAVE_LINALG_ELPA
3411  if (present(use_gpu_elpa)) use_gpu_elpa_=use_gpu_elpa
3412 #endif
3413 
3414  ! ================================
3415  ! INITIALISATION SCALAPACK MATRIX
3416  ! ================================
3417  call sca_matrix1%init(nbli_global,nbco_global,processor,istwf_k)
3418  call sca_matrix2%init(nbli_global,nbco_global,processor,istwf_k)
3419  call sca_matrix3%init(nbli_global,nbco_global,processor,istwf_k)
3420 
3421  ! ==============================
3422  ! FILLING SCALAPACK MATRIX
3423  ! ==============================
3424  if ( istwf_k /= 2 ) then
3425    ABI_CHECK_IEQ(cplex, 2, "cplex != 2")
3426    ABI_MALLOC(z_tmp_evec,(nbli_global,nbco_global))
3427    z_tmp_evec=cmplx(0._DP,0._DP)
3428 #ifdef HAVE_LINALG_ELPA
3429    ! The full matrix must be set (not only one half like in scalapack).
3430    do j=1,nbco_global
3431       do i=j+1,nbli_global
3432          matrix1(2*(i-1)+1,j) = matrix1(2*(j-1)+1,i)
3433          matrix1(2*(i-1)+2,j) = -matrix1(2*(j-1)+2,i)
3434          matrix2(2*(i-1)+1,j) = matrix2(2*(j-1)+1,i)
3435          matrix2(2*(i-1)+2,j) = -matrix2(2*(j-1)+2,i)
3436       end do
3437    end do
3438 #endif
3439    call matrix_from_complexmatrix(sca_matrix1,matrix1,istwf_k)
3440    call matrix_from_complexmatrix(sca_matrix2,matrix2,istwf_k)
3441  else
3442    ABI_CHECK_IEQ(cplex, 1, "cplex != 1")
3443    ABI_MALLOC(r_tmp_evec,(nbli_global,nbco_global))
3444    r_tmp_evec(:,:)=0._DP
3445 #ifdef HAVE_LINALG_ELPA
3446    ! The full matrix must be set (not only one half like in scalapack).
3447    do j=1,nbco_global
3448       do i=j+1,nbli_global
3449          matrix1(i,j) = matrix1(j,i)
3450          matrix2(i,j) = matrix2(j,i)
3451       end do
3452    end do
3453 #endif
3454    call matrix_from_realmatrix(sca_matrix1,matrix1,istwf_k)
3455    call matrix_from_realmatrix(sca_matrix2,matrix2,istwf_k)
3456  endif
3457 
3458  ! ================================
3459  ! COMPUTE EIGEN VALUES AND VECTORS : A * X = lambda * B * X
3460  ! ================================
3461  call compute_generalized_eigen_problem(processor,sca_matrix1,sca_matrix2,&
3462 &             sca_matrix3,vector,comm,istwf_k,use_gpu_elpa=use_gpu_elpa_)
3463 
3464  ! ==============================
3465  ! CONCATENATE EIGEN VECTORS
3466  ! ==============================
3467 #ifdef HAVE_MPI
3468  if ( istwf_k /= 2 ) then
3469    call matrix_to_complexmatrix(sca_matrix3,z_tmp_evec,istwf_k)
3470    call MPI_ALLREDUCE(z_tmp_evec, matrix1, nbli_global*nbco_global, MPI_DOUBLE_complex,&
3471 &    MPI_SUM,comm,ierr)
3472  else
3473    call matrix_to_realmatrix(sca_matrix3,r_tmp_evec,istwf_k)
3474    call MPI_ALLREDUCE(r_tmp_evec, matrix1, nbli_global*nbco_global, MPI_DOUBLE_PRECISION,&
3475 &    MPI_SUM,comm,ierr)
3476  endif
3477 #endif
3478 
3479  ! ====================================
3480  ! DESTRUCTION SCALAPACK AND TMP MATRICES
3481  ! ====================================
3482  call sca_matrix1%free()
3483  call sca_matrix2%free()
3484  call sca_matrix3%free()
3485 
3486 
3487 #ifndef HAVE_LINALG_ELPA
3488  ABI_UNUSED(use_gpu_elpa)
3489 #endif
3490 
3491 end subroutine compute_eigen2

m_slk/compute_eigen_problem [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  compute_eigen_problem

FUNCTION

  Calculation of eigenvalues and eigenvectors: A * X = lambda * X, complex and real cases.

INPUTS

  processor= descriptor of a processor
  matrix= the matrix to process
  comm= MPI communicator
  istwf_k= 2 if we have a real matrix else complex.
  [nev]= Number of eigenvalues needed. Default: full set
  [use_gpu_elpa]= Flag to activate the use of GPU (ELPA only)

OUTPUT

  results= ScaLAPACK matrix coming out of the operation (global dimensions must be equal to matrix
         even if only a part of the eigenvectors is needed.
  eigen= eigenvalues of the matrix dimensioned as the global size of the square matrix
         even if only a part of the eigenvalues is needed.

SOURCE

2660 subroutine compute_eigen_problem(processor, matrix, results, eigen, comm, istwf_k, &
2661 &                                nev, use_gpu_elpa) ! Optional arguments
2662 
2663 #ifdef HAVE_LINALG_ELPA
2664   !Arguments ------------------------------------
2665   class(processor_scalapack),intent(in) :: processor
2666   class(matrix_scalapack),intent(inout) :: matrix
2667   class(matrix_scalapack),intent(inout) :: results
2668   DOUBLE PRECISION,intent(inout) :: eigen(:)
2669   integer,intent(in)  :: comm,istwf_k
2670   integer,optional,intent(in) :: nev
2671   integer,optional,intent(in) :: use_gpu_elpa
2672 
2673   !Local variables ------------------------------
2674   type(elpa_hdl_t) :: elpa_hdl
2675   integer :: nev__,use_gpu_elpa_
2676 
2677 !************************************************************************
2678 
2679   nev__ = matrix%sizeb_global(1); if (present(nev)) nev__ = nev
2680   use_gpu_elpa_=0
2681 #ifdef HAVE_LINALG_ELPA
2682   if (present(use_gpu_elpa)) use_gpu_elpa_=use_gpu_elpa
2683 #endif
2684 
2685   call elpa_func_allocate(elpa_hdl,gpu=use_gpu_elpa_)
2686   call elpa_func_set_matrix(elpa_hdl,matrix%sizeb_global(1),matrix%sizeb_blocs(1),nev__,&
2687 &                           matrix%sizeb_local(1),matrix%sizeb_local(2))
2688   call elpa_func_get_communicators(elpa_hdl,processor%comm,processor%coords(1),processor%coords(2))
2689 
2690   if (istwf_k/=2) then
2691     call elpa_func_solve_evp_1stage(elpa_hdl,matrix%buffer_cplx,results%buffer_cplx,eigen,nev__)
2692   else
2693     call elpa_func_solve_evp_1stage(elpa_hdl,matrix%buffer_real,results%buffer_real,eigen,nev__)
2694   end if
2695 
2696   call elpa_func_deallocate(elpa_hdl)
2697 
2698 #else
2699   !Arguments ------------------------------------
2700   class(processor_scalapack),intent(in)       :: processor
2701   class(matrix_scalapack),intent(in)          :: matrix
2702   class(matrix_scalapack),intent(inout)       :: results
2703   DOUBLE PRECISION,intent(inout) :: eigen(:)
2704   integer,intent(in)  :: comm,istwf_k
2705   integer,optional,intent(in) :: nev
2706   integer,optional,intent(in) :: use_gpu_elpa
2707 
2708 #ifdef HAVE_LINALG_SCALAPACK
2709   !Local variables-------------------------------
2710   integer            :: LRWORK,LIWORK,LCWORK,INFO
2711   character(len=500) :: msg
2712 
2713   integer         , dimension(1) :: IWORK_tmp
2714   DOUBLE PRECISION, dimension(1) :: RWORK_tmp
2715   complex(dpc)     , dimension(1) :: CWORK_tmp
2716 
2717   integer         , allocatable  :: IWORK(:)
2718   DOUBLE PRECISION, allocatable  :: RWORK(:)
2719   complex(dpc)     , allocatable  :: CWORK(:)
2720 
2721   integer,          allocatable :: ICLUSTR(:)
2722   integer,          allocatable :: IFAIL(:)
2723   DOUBLE PRECISION, allocatable :: GAP(:)
2724 
2725   DOUBLE PRECISION            :: ABSTOL,ORFAC
2726   integer,          parameter :: IZERO=0
2727 
2728   integer ::  M,NZ,ierr,TWORK_tmp(3),TWORK(3) ! IA,JA,IZ,JZ,
2729   integer :: nev__, il, iu
2730   character(len=1) :: range
2731 
2732   DOUBLE PRECISION, external :: PDLAMCH
2733 
2734 ! *************************************************************************
2735 
2736   ABI_UNUSED(use_gpu_elpa) ! No GPU implementation is using scaLAPACK
2737   nev__ = matrix%sizeb_global(1); range = "A"; il = 0; iu = 0
2738   if (present(nev)) then
2739     nev__ = nev; range = "I"; il = 1; iu = nev
2740   end if
2741 
2742   ! Initialisation
2743   INFO   = 0
2744   ABSTOL = zero
2745   ORFAC  = -1.D+0
2746 
2747   ! Allocation of the variables for the results of the calculations
2748   ABI_MALLOC(IFAIL,(matrix%sizeb_global(2)))
2749   ABI_MALLOC(ICLUSTR,(2*processor%grid%dims(1)*processor%grid%dims(2)))
2750   ABI_MALLOC(GAP,(processor%grid%dims(1)*processor%grid%dims(2)))
2751 
2752   ! Get the size of the work arrays
2753   if (istwf_k/=2) then
2754      call PZHEEVX('V', range, 'U',&
2755 &      matrix%sizeb_global(2),&
2756 &      matrix%buffer_cplx,1,1,matrix%descript%tab, &
2757 &      ZERO,ZERO,il,iu,ABSTOL,&
2758 &      m,nz,eigen,ORFAC, &
2759 &      results%buffer_cplx,1,1,results%descript%tab, &
2760 &      CWORK_tmp,-1,RWORK_tmp,-1,IWORK_tmp,-1,&
2761 &      IFAIL,ICLUSTR,GAP,INFO)
2762   else
2763      call PDSYEVX('V', range, 'U',&
2764 &      matrix%sizeb_global(2),&
2765 &      matrix%buffer_real,1,1,matrix%descript%tab, &
2766 &      ZERO,ZERO,il,iu,ABSTOL,&
2767 &      m,nz,eigen,ORFAC, &
2768 &      results%buffer_real,1,1,results%descript%tab, &
2769 &      RWORK_tmp,-1,IWORK_tmp,-1,&
2770 &      IFAIL,ICLUSTR,GAP,INFO)
2771   end if
2772 
2773   if (INFO/=0) then
2774      write(msg,'(A,I6)') "Problem to compute workspace to use ScaLAPACK, INFO=",INFO
2775      ABI_ERROR(msg)
2776   endif
2777 
2778   TWORK_tmp(1) = IWORK_tmp(1)
2779   TWORK_tmp(2) = INT(RWORK_tmp(1))
2780   TWORK_tmp(3) = INT(real(CWORK_tmp(1)))
2781 
2782   ! Get the maximum of the size of the work arrays processor%comm
2783   call MPI_ALLREDUCE(TWORK_tmp,TWORK,3,MPI_integer,MPI_MAX,comm,ierr)
2784 
2785   LIWORK = TWORK(1)
2786   LRWORK = TWORK(2) + matrix%sizeb_global(2) *(matrix%sizeb_global(2)-1)
2787   LCWORK = TWORK(3)
2788 
2789   ! Allocation of the work arrays
2790   if (LIWORK>0) then
2791     ABI_MALLOC(IWORK,(LIWORK))
2792     IWORK(:) = 0
2793   else
2794     ABI_MALLOC(IWORK,(1))
2795   end if
2796   if (LRWORK>0) then
2797     ABI_MALLOC(RWORK,(LRWORK))
2798     RWORK(:) = 0._dp
2799   else
2800     ABI_MALLOC(RWORK,(1))
2801   end if
2802   if (LCWORK>0) then
2803     ABI_MALLOC(CWORK,(LCWORK))
2804     CWORK(:) = (0._dp,0._dp)
2805   else
2806     ABI_MALLOC(CWORK,(1))
2807   end if
2808 
2809   ! prototype
2810   !call pzheevx(jobz, range, uplo, n, a, ia, ja, desca, vl, vu, il, iu, abstol, m, nz, w,
2811   !             orfac, z, iz, jz, descz, work, lwork, rwork, lrwork, iwork, liwork, ifail, iclustr, gap, info)
2812 
2813   ! Call the calculation routine
2814   if (istwf_k/=2) then
2815     ! write(std_out,*) 'I am using PZHEEVX'
2816     call PZHEEVX('V', range, 'U',&
2817      matrix%sizeb_global(2),&
2818      matrix%buffer_cplx,1,1,matrix%descript%tab, &
2819      ZERO,ZERO,il,iu,ABSTOL,&
2820      m,nz,eigen,ORFAC, &
2821      results%buffer_cplx,1,1,results%descript%tab, &
2822      CWORK,LCWORK,RWORK,LRWORK,IWORK,LIWORK,&
2823      IFAIL,ICLUSTR,GAP,INFO)
2824   else
2825     ! write(std_out,*) ' I am using PDSYEVX'
2826     call PDSYEVX('V', range, 'U',&
2827      matrix%sizeb_global(2),&
2828      matrix%buffer_real,1,1,matrix%descript%tab, &
2829      ZERO,ZERO,il,iu,ABSTOL,&
2830      m,nz,eigen,ORFAC, &
2831      results%buffer_real,1,1,results%descript%tab, &
2832      RWORK,LRWORK,IWORK,LIWORK,&
2833      IFAIL,ICLUSTR,GAP,INFO)
2834   endif
2835 
2836   ! MG: TODO: Recheck the computation of the workspace as I got INFO 2 with a 5x5x5 si supercell.
2837   if (INFO/=0) then
2838     write(msg,'(A,I0)') "Problem to compute eigenvalues and eigenvectors with ScaLAPACK, INFO=",INFO
2839     ABI_ERROR(msg)
2840   endif
2841 
2842   ABI_FREE(IFAIl)
2843   ABI_FREE(ICLUSTR)
2844   ABI_FREE(GAP)
2845 
2846   ABI_SFREE(IWORK)
2847   ABI_SFREE(RWORK)
2848   ABI_SFREE(CWORK)
2849 #endif
2850 #endif
2851   return
2852 
2853 end subroutine compute_eigen_problem

m_slk/compute_generalized_eigen_problem [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  compute_generalized_eigen_problem

FUNCTION

  Calculation of eigenvalues and eigenvectors of the generalized eigenvalue problem: A * X = lambda * B X
  complex and real cases.

INPUTS

  processor= descriptor of a processor
  matrix1=  A matrix
  matrix2=  B matrix
  comm= MPI communicator
  istwf_k= 2 if we have a real matrix else complex.
  [nev]= Number of eigenvalues needed. Default: full set
  [use_gpu_elpa]= Flag to activate the use of GPU (ELPA only)

OUTPUT

  results= ScaLAPACK matrix coming out of the operation (global dimensions must be equal to matrix
         even if only a part of the eigenvectors is needed.
  eigen= eigenvalues of the matrix dimensioned as the global size of the square matrix
         even if only a part of the eigenvalues is needed.

SOURCE

3027 subroutine compute_generalized_eigen_problem(processor,matrix1,matrix2,results,eigen,comm,istwf_k,&
3028 &                                            nev,use_gpu_elpa) ! Optional arguments
3029 
3030 #ifdef HAVE_LINALG_ELPA
3031 !Arguments ------------------------------------
3032   class(processor_scalapack),intent(in)       :: processor
3033   class(matrix_scalapack),intent(in)          :: matrix1,matrix2
3034   class(matrix_scalapack),intent(inout)       :: results
3035   DOUBLE PRECISION,intent(inout) :: eigen(:)
3036   integer,intent(in)  :: comm,istwf_k
3037   integer,optional,intent(in) :: nev
3038   integer,optional,intent(in) :: use_gpu_elpa
3039 !Local
3040   type(matrix_scalapack) :: tmp1, tmp2
3041   integer :: i,n_col, n_row, nev__,use_gpu_elpa__
3042   integer,external :: indxl2g,numroc
3043 
3044   nev__ = matrix1%sizeb_global(2); if (present(nev)) nev__ = nev
3045   use_gpu_elpa__ = 0
3046 #ifdef HAVE_LINALG_ELPA
3047   if (present(use_gpu_elpa)) use_gpu_elpa__ = use_gpu_elpa
3048 #endif
3049 
3050   call tmp1%init(matrix1%sizeb_global(1),matrix1%sizeb_global(2),processor,istwf_k)
3051   call tmp2%init(matrix1%sizeb_global(1),matrix1%sizeb_global(2),processor,istwf_k)
3052 
3053   if (istwf_k/=2) then
3054      call solve_gevp_complex(matrix1%sizeb_global(1), nev__, &
3055 &          matrix1%sizeb_local(1),matrix1%sizeb_local(2),matrix1%sizeb_blocs(1), &
3056 &          matrix1%buffer_cplx,matrix2%buffer_cplx,eigen,results%buffer_cplx, &
3057 &          tmp1%buffer_cplx,tmp2%buffer_cplx, &
3058 &          processor%coords(1),processor%coords(2), &
3059 &          processor%grid%dims(1),processor%grid%dims(2), &
3060 &          matrix1%descript%tab,processor%comm,use_gpu_elpa=use_gpu_elpa__)
3061   else
3062      call solve_gevp_real(matrix1%sizeb_global(1), nev__, &
3063 &          matrix1%sizeb_local(1),matrix1%sizeb_local(2),matrix1%sizeb_blocs(1), &
3064 &          matrix1%buffer_real,matrix2%buffer_real,eigen,results%buffer_real, &
3065 &          tmp1%buffer_real,tmp2%buffer_real, &
3066 &          processor%coords(1),processor%coords(2), &
3067 &          processor%grid%dims(1),processor%grid%dims(2), &
3068 &          matrix1%descript%tab,processor%comm,use_gpu_elpa=use_gpu_elpa__)
3069   end if
3070   call tmp1%free()
3071   call tmp2%free()
3072 
3073 #else
3074 !Arguments ------------------------------------
3075   class(processor_scalapack),intent(in)       :: processor
3076   class(matrix_scalapack),intent(in)          :: matrix1,matrix2
3077   class(matrix_scalapack),intent(inout)       :: results
3078   DOUBLE PRECISION,intent(inout) :: eigen(:)
3079   integer,intent(in)  :: comm,istwf_k
3080   integer,optional,intent(in) :: nev
3081   integer,optional,intent(in) :: use_gpu_elpa
3082 
3083 #ifdef HAVE_LINALG_SCALAPACK
3084 !Local variables-------------------------------
3085   integer            :: LRWORK,LIWORK,LCWORK,INFO
3086   character(len=500) :: msg
3087   integer         , dimension(1) :: IWORK_tmp
3088   DOUBLE PRECISION, dimension(1) :: RWORK_tmp
3089   complex(dpc)     , dimension(1) :: CWORK_tmp
3090 
3091   integer         , allocatable  :: IWORK(:)
3092   DOUBLE PRECISION, allocatable  :: RWORK(:)
3093   complex(dpc)     , allocatable  :: CWORK(:)
3094   integer,          allocatable :: ICLUSTR(:)
3095   integer,          allocatable :: IFAIL(:)
3096   DOUBLE PRECISION, allocatable :: GAP(:)
3097   DOUBLE PRECISION            :: ABSTOL,ORFAC
3098   integer         , parameter :: IZERO=0
3099   integer ::  M,NZ,ierr,TWORK_tmp(3),TWORK(3) ! IA,JA,IZ,JZ,
3100   character(len=1) :: range
3101   integer :: nev__, il, iu
3102   DOUBLE PRECISION, external :: PDLAMCH
3103 
3104 ! *************************************************************************
3105 
3106   ABI_UNUSED(use_gpu_elpa) ! No GPU implementation is using scaLAPACK
3107   nev__ = matrix1%sizeb_global(2); range = "A"; il = 0; iu = 0
3108   if (present(nev)) then
3109     nev__ = nev; range = "I"; il = 1; iu = nev
3110   end if
3111 
3112   ! Initialisation
3113   INFO   = 0
3114   ABSTOL = zero
3115   ORFAC  = -1.D+0
3116 
3117   ! Allocate the arrays for the results of the calculation
3118   ABI_MALLOC(IFAIL  ,(matrix1%sizeb_global(2)))
3119   ABI_MALLOC(ICLUSTR,(2*processor%grid%dims(1)*processor%grid%dims(2)))
3120   ABI_MALLOC(GAP    ,(  processor%grid%dims(1)*processor%grid%dims(2)))
3121 
3122   ! Get the size of the work arrays
3123   if (istwf_k /= 2) then
3124      call PZHEGVX(1, 'V', range, 'U',&
3125 &      matrix1%sizeb_global(2),&
3126 &      matrix1%buffer_cplx,1,1,matrix1%descript%tab, &
3127 &      matrix2%buffer_cplx,1,1,matrix2%descript%tab, &
3128 &      ZERO,ZERO,il,iu,ABSTOL,&
3129 &      m,nz,eigen,ORFAC, &
3130 &      results%buffer_cplx,1,1,results%descript%tab, &
3131 &      CWORK_tmp,-1,RWORK_tmp,-1,IWORK_tmp,-1,&
3132 &      IFAIL,ICLUSTR,GAP,INFO)
3133   else
3134      call PDSYGVX(1,'V',range,'U',&
3135 &      matrix1%sizeb_global(2),&
3136 &      matrix1%buffer_real,1,1,matrix1%descript%tab, &
3137 &      matrix2%buffer_real,1,1,matrix2%descript%tab, &
3138 &      ZERO,ZERO,il,iu,ABSTOL,&
3139 &      m,nz,eigen,ORFAC, &
3140 &      results%buffer_real,1,1,results%descript%tab, &
3141 &      RWORK_tmp,-1,IWORK_tmp,-1,&
3142 &      IFAIL,ICLUSTR,GAP,INFO)
3143   endif
3144 
3145   if (INFO/=0) then
3146      write(msg,'(A,I6)') "Problem to compute workspace to use ScaLAPACK, INFO=",INFO
3147      ABI_ERROR(msg)
3148   endif
3149 
3150   TWORK_tmp(1) = IWORK_tmp(1)
3151   TWORK_tmp(2) = INT(RWORK_tmp(1)) + matrix1%sizeb_global(2) *(matrix1%sizeb_global(2)-1)
3152   TWORK_tmp(3) = INT(real(CWORK_tmp(1)))
3153 
3154  ! Get the maximum of sizes of the work arrays processor%comm
3155   call MPI_ALLREDUCE(TWORK_tmp,TWORK,3,MPI_integer,MPI_MAX,comm,ierr)
3156 
3157   LIWORK = TWORK(1)
3158   LRWORK = TWORK(2)
3159   LCWORK = TWORK(3)
3160 
3161  ! Allocate the work arrays
3162   if (LIWORK>0) then
3163     ABI_MALLOC(IWORK,(LIWORK))
3164     IWORK(:) = 0
3165   else
3166     ABI_MALLOC(IWORK,(1))
3167   end if
3168   if (LRWORK>0) then
3169     ABI_MALLOC(RWORK,(LRWORK))
3170     RWORK(:) = 0._dp
3171   else
3172     ABI_MALLOC(RWORK,(1))
3173   end if
3174   if (LCWORK>0) then
3175     ABI_MALLOC(CWORK,(LCWORK))
3176     CWORK(:) = (0._dp,0._dp)
3177   else
3178     ABI_MALLOC(CWORK,(1))
3179   end if
3180 
3181   ! Call the calculation routine
3182   if (istwf_k/=2) then
3183      ! write(std_out,*) 'I am using PZHEGVX'
3184      call PZHEGVX(1,'V',range,'U',&
3185 &      matrix1%sizeb_global(2),&
3186 &      matrix1%buffer_cplx,1,1,matrix1%descript%tab, &
3187 &      matrix2%buffer_cplx,1,1,matrix2%descript%tab, &
3188 &      ZERO,ZERO,il,iu,ABSTOL,&
3189 &      m,nz,eigen,ORFAC, &
3190 &      results%buffer_cplx,1,1,results%descript%tab, &
3191 &      CWORK,LCWORK,RWORK,LRWORK,IWORK,LIWORK,&
3192 &      IFAIL,ICLUSTR,GAP,INFO)
3193   else
3194      ! write(std_out,*) 'I am using PDSYGVX'
3195      call PDSYGVX(1,'V',range,'U',&
3196 &      matrix1%sizeb_global(2),&
3197 &      matrix1%buffer_real,1,1,matrix1%descript%tab, &
3198 &      matrix2%buffer_real,1,1,matrix2%descript%tab, &
3199 &      ZERO,ZERO,il,iu,ABSTOL,&
3200 &      m,nz,eigen,ORFAC, &
3201 &      results%buffer_real,1,1,results%descript%tab, &
3202 &      RWORK,LRWORK,IWORK,LIWORK,&
3203 &      IFAIL,ICLUSTR,GAP,INFO)
3204   endif
3205 
3206   if (INFO/=0) then
3207      write(msg,'(A,I6)') "Problem to compute eigen problem with ScaLAPACK, INFO=",INFO
3208      ABI_ERROR(msg)
3209   endif
3210 
3211   ABI_FREE(IFAIl)
3212   ABI_FREE(ICLUSTR)
3213   ABI_FREE(GAP)
3214   ABI_SFREE(IWORK)
3215   ABI_SFREE(RWORK)
3216   ABI_SFREE(CWORK)
3217 #endif
3218 #endif
3219   return
3220 
3221 end subroutine compute_generalized_eigen_problem

m_slk/descript_scalapack [ Types ]

[ Top ] [ m_slk ] [ Types ]

NAME

  descript_scalapack

FUNCTION

 ScaLAPACK matrix descriptor.

SOURCE

134  type,public :: descript_scalapack
135    integer :: tab(DLEN_)
136  end type descript_scalapack

m_slk/glob_loc__ [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  glob_loc__

FUNCTION

  Returns the global location of a matrix coefficient.

INPUTS

  matrix= the matrix to process
  idx= number of rows in the distributed matrix
  lico= block size index

SOURCE

1558 integer function glob_loc__(matrix, idx, lico)
1559 
1560 !Arguments ------------------------------------
1561  class(basemat_t),intent(in) :: matrix
1562  integer, intent(in) :: idx, lico
1563 
1564 #ifdef HAVE_LINALG_SCALAPACK
1565 !Local variables-------------------------------
1566  integer,external :: NUMROC
1567 
1568 ! *********************************************************************
1569 
1570  glob_loc__ = NUMROC(idx, matrix%sizeb_blocs(lico), &
1571                    matrix%processor%coords(lico), 0, matrix%processor%grid%dims(lico))
1572 #endif
1573 
1574 end function glob_loc__

m_slk/grid_init [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  grid_init

FUNCTION

  Set up the ScaLAPACKgrid given the total number of processors.

INPUTS

  nbprocs= total number of processors
  comm= MPI communicator
  [grid_dims]=Number of procs for each dimension.

OUTPUT

  grid= the grid of processors used by Scalapack

SOURCE

407 subroutine grid_init(grid, nbprocs, comm, use_gpu, grid_dims)
408 
409 !Arguments ------------------------------------
410  class(grid_scalapack),intent(out) :: grid
411  integer,intent(in) :: nbprocs,comm
412  logical,intent(in) :: use_gpu
413  integer,optional,intent(in) :: grid_dims(2)
414 
415 !Local variables-------------------------------
416  integer :: i
417 
418 ! *********************************************************************
419 
420  grid%nbprocs = nbprocs
421 
422  if (.not. present(grid_dims)) then
423    ! Search for a rectangular grid of processors
424    i=INT(SQRT(float(nbprocs)))
425    do while (MOD(nbprocs,i) /= 0)
426      i = i-1
427    end do
428    i=max(i,1)
429 
430    grid%dims(1) = i
431    grid%dims(2) = INT(nbprocs/i)
432 
433  else
434    grid%dims = grid_dims
435  end if
436 
437  ABI_CHECK(product(grid%dims) == nbprocs, sjoin("grid%dims:", ltoa(grid%dims), "does not agree with nprocs:", itoa(nbprocs)))
438 
439  grid%ictxt = comm
440  grid%use_gpu = use_gpu
441 
442 #ifdef HAVE_LINALG_SCALAPACK
443  ! 'R': Use row-major natural ordering
444  call BLACS_GRIDINIT(grid%ictxt, 'R', grid%dims(1), grid%dims(2))
445 #endif
446 
447 end subroutine grid_init

m_slk/grid_scalapack [ Types ]

[ Top ] [ m_slk ] [ Types ]

NAME

  grid_scalapack

FUNCTION

  Grid of ScaLAPACK processors.

SOURCE

71  type,public :: grid_scalapack
72 
73    integer :: nbprocs = -1
74    ! Total number of processors
75 
76    integer :: dims(2) = -1
77    ! Number of procs for rows/columns
78 
79    integer :: ictxt = xmpi_comm_null
80    ! BLACS context i.e. MPI communicator.
81 
82    logical :: use_gpu = .false.
83    ! Wether GPU is used, relevant for determining matrix block size.
84 
85  contains
86    procedure :: init =>  grid_init  ! Set up the processor grid for ScaLAPACK.
87  end type grid_scalapack

m_slk/idx_loc [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  idx_loc

FUNCTION

  Return local indices from global indices, **independently** of the processor.

INPUTS

  matrix= the matrix to process
  i= row in the matrix
  j= column in the matrix

OUTPUT

  iloc= local row of the coefficient
  jloc= local column of the coefficient

SOURCE

1527 subroutine idx_loc(matrix, i, j, iloc, jloc)
1528 
1529 !Arguments ------------------------------------
1530  class(basemat_t),intent(in) :: matrix
1531  integer, intent(in) :: i,j
1532  integer, intent(out) :: iloc,jloc
1533 
1534 ! *********************************************************************
1535 
1536  iloc = glob_loc__(matrix, i, 1)
1537  jloc = glob_loc__(matrix, j, 2)
1538 
1539 end subroutine idx_loc

m_slk/init_matrix_scalapack [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  init_matrix_scalapack

FUNCTION

  Initialisation of a SCALAPACK matrix (each proc initializes its own part of the matrix)

INPUTS

  processor= descriptor of a processor
  nbli_global= total number of lines
  nbco_global= total number of columns
  istwf_k= 2 if we have a real matrix else complex.
  [size_blocs]= custom block sizes. Use -1 to use global size along that direction.
    Useful to distribute only rows or columns. Obviously, [-1, -1] is not allowed.

OUTPUT

  matrix= the matrix to process

SOURCE

591 subroutine init_matrix_scalapack(matrix, nbli_global, nbco_global, processor, istwf_k, size_blocs)
592 
593 !Arguments ------------------------------------
594  class(basemat_t),intent(inout) :: matrix
595  integer,intent(in) :: nbli_global, nbco_global, istwf_k
596  type(processor_scalapack),target,intent(in) :: processor
597  integer,intent(in),optional :: size_blocs(2)
598 
599 #ifdef HAVE_LINALG_SCALAPACK
600 !Local variables-------------------------------
601 #ifdef HAVE_LINALG_ELPA
602  integer, parameter :: DEFAULT_SIZE_BLOCS = 1
603 #else
604  ! As recommended by Intel MKL, a more sensible default than the previous value of 40
605  integer, parameter :: DEFAULT_SIZE_BLOCS = 24
606 #endif
607  ! As recommanded in ELPA, which advises distributions as squared as possible using powers of 2
608  integer, parameter :: DEFAULT_SIZE_BLOCS_GPU = 16
609  integer :: info,sizeb
610  integer,external :: NUMROC
611  !character(len=500) :: msg
612 
613 ! *********************************************************************
614 
615  !call matrix%free()
616 
617  sizeb = DEFAULT_SIZE_BLOCS
618  if(processor%grid%use_gpu) sizeb = DEFAULT_SIZE_BLOCS_GPU
619 
620  !Records of the matrix type
621  matrix%processor => processor
622  matrix%sizeb_blocs(1) = MIN(sizeb, nbli_global)
623  matrix%sizeb_blocs(2) = MIN(sizeb, nbco_global)
624 
625 #ifdef HAVE_LINALG_ELPA
626  if(matrix%sizeb_blocs(1) .ne. matrix%sizeb_blocs(2)) then
627     matrix%sizeb_blocs(1) = MIN(matrix%sizeb_blocs(1), matrix%sizeb_blocs(2))
628     matrix%sizeb_blocs(2) = matrix%sizeb_blocs(1)
629  end if
630 #endif
631 
632  ! Use custom block sizes.
633  if (present(size_blocs)) then
634    ABI_CHECK(.not. all(size_blocs == -1), "size_blocs [-1, -1]  is not allowed")
635    if (size_blocs(1) == -1) then
636      matrix%sizeb_blocs(1) = nbli_global
637    else
638      matrix%sizeb_blocs(1) = MIN(size_blocs(1), nbli_global)
639    end if
640    if (size_blocs(2) == -1) then
641      matrix%sizeb_blocs(2) = nbco_global
642    else
643      matrix%sizeb_blocs(2) = MIN(size_blocs(2), nbco_global)
644    end if
645  end if
646 
647  matrix%sizeb_global(1) = nbli_global
648  matrix%sizeb_global(2) = nbco_global
649 
650  ! Size of the local buffer
651  ! NUMROC computes the NUMber of Rows Or Columns of a distributed matrix owned by the process indicated by IPROC.
652  ! NUMROC (n, nb, iproc, isrcproc, nprocs)
653  matrix%sizeb_local(1) = NUMROC(nbli_global, matrix%sizeb_blocs(1), &
654                                 processor%coords(1), 0, processor%grid%dims(1))
655 
656  matrix%sizeb_local(2) = NUMROC(nbco_global,matrix%sizeb_blocs(2), &
657                                 processor%coords(2), 0, processor%grid%dims(2))
658 
659  call idx_loc(matrix, matrix%sizeb_global(1), matrix%sizeb_global(2), &
660               matrix%sizeb_local(1), matrix%sizeb_local(2))
661 
662  ! Initialisation of the SCALAPACK description of the matrix
663  ! (desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
664  call DESCINIT(matrix%descript%tab, nbli_global, nbco_global, &
665                matrix%sizeb_blocs(1), matrix%sizeb_blocs(2), 0, 0, &
666                processor%grid%ictxt, MAX(1, matrix%sizeb_local(1)), info)
667 
668  if (info /= 0) then
669    ABI_ERROR(sjoin("Error while initializing scalapack matrix. info:", itoa(info)))
670  end if
671 
672  ! Allocate local buffer.
673  matrix%istwf_k = istwf_k
674  select type (matrix)
675  class is (matrix_scalapack)
676    if (istwf_k /= 2) then
677      ABI_MALLOC(matrix%buffer_cplx, (matrix%sizeb_local(1), matrix%sizeb_local(2)))
678      matrix%buffer_cplx = czero
679    else
680      ABI_MALLOC(matrix%buffer_real, (matrix%sizeb_local(1), matrix%sizeb_local(2)))
681      matrix%buffer_real = zero
682    end if
683  class is (slkmat_sp_t)
684    if (istwf_k /= 2) then
685      ABI_MALLOC(matrix%buffer_cplx, (matrix%sizeb_local(1), matrix%sizeb_local(2)))
686      matrix%buffer_cplx = czero_sp
687    else
688      ABI_MALLOC(matrix%buffer_real, (matrix%sizeb_local(1), matrix%sizeb_local(2)))
689      matrix%buffer_real = zero_sp
690    end if
691  class default
692    ABI_ERROR("Wrong class")
693  end select
694 #endif
695 
696 end subroutine init_matrix_scalapack

m_slk/loc_glob__ [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  loc_glob__

FUNCTION

  Determine the global index from a local index (row or column) as a function of a given processor

INPUTS

  matrix= the matrix to process
  proc= descriptor of a processor
  idx= number of rows in the distributed matrix
  lico= block size index. 1 for rows. 2 for columns

SOURCE

1726 integer pure function loc_glob__(matrix, proc, idx, lico)
1727 
1728 !Arguments ------------------------------------
1729  class(basemat_t),intent(in) :: matrix
1730  class(processor_scalapack),intent(in) :: proc
1731  integer, intent(in) :: idx,lico
1732 
1733 !Local variables-------------------------------
1734  integer :: nbcyc, rest, nblocs
1735 
1736 ! *********************************************************************
1737 
1738  nbcyc = INT((idx-1) / matrix%sizeb_blocs(lico))
1739  rest = MOD(idx-1, matrix%sizeb_blocs(lico))
1740  nblocs = nbcyc * proc%grid%dims(lico) + proc%coords(lico)
1741 
1742  loc_glob__ = nblocs * matrix%sizeb_blocs(lico) + rest + 1
1743 
1744 end function loc_glob__

m_slk/locmem_mb [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  locmem_mb

FUNCTION

  Returns memory allocated for the local buffer in Mb.

SOURCE

710 pure real(dp) function locmem_mb(mat)
711 
712 !Arguments ------------------------------------
713  class(basemat_t),intent(in) :: mat
714 
715 ! *********************************************************************
716 
717  locmem_mb = zero
718  select type (mat)
719  class is (matrix_scalapack)
720    if (allocated(mat%buffer_real)) locmem_mb = product(int(shape(mat%buffer_real))) * dp
721    if (allocated(mat%buffer_cplx)) locmem_mb = product(int(shape(mat%buffer_cplx))) * two * dp
722  class is (slkmat_sp_t)
723    if (allocated(mat%buffer_real)) locmem_mb = product(int(shape(mat%buffer_real))) * sp
724    if (allocated(mat%buffer_cplx)) locmem_mb = product(int(shape(mat%buffer_cplx))) * two * sp
725  end select
726  locmem_mb = locmem_mb * b2Mb
727 
728 end function locmem_mb

m_slk/matrix_from_complexmatrix [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  matrix_from_complexmatrix

FUNCTION

  Routine to fill a SCALAPACK matrix from a global matrix (FULL STORAGE MODE)

INPUTS

  istwf_k= 2 if we have a real matrix else complex.
  reference= a complex matrix

SIDE EFFECTS

  matrix= the matrix to process

SOURCE

1919 subroutine matrix_from_complexmatrix(matrix, reference, istwf_k)
1920 
1921 !Arguments ------------------------------------
1922  class(matrix_scalapack),intent(inout) :: matrix
1923  integer,intent(in) :: istwf_k
1924 !arrays
1925  real(dp),intent(in) :: reference(:,:)
1926 
1927 !Local variables-------------------------------
1928  integer :: i,j,iglob,jglob
1929  complex(dpc) :: val
1930 
1931 ! *********************************************************************
1932 
1933  ABI_UNUSED(istwf_k)
1934 
1935  do i=1,matrix%sizeb_local(1)
1936    do j=1,matrix%sizeb_local(2)
1937      call matrix%loc2glob(i, j, iglob, jglob)
1938      val = dcmplx(reference(2*iglob-1, jglob),reference(2*iglob, jglob))
1939      call matrix_set_local_cplx(matrix,i,j,val)
1940    end do
1941  end do
1942 
1943 end subroutine matrix_from_complexmatrix

m_slk/matrix_from_global [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  matrix_from_global

FUNCTION

  Routine to fill a SCALAPACK matrix from a global PACKED matrix.

INPUTS

  istwf_k= 2 if we have a real matrix else complex.
  reference= one-dimensional array with packed matrix.

SIDE EFFECTS

  matrix= the matrix to process

SOURCE

1765 subroutine matrix_from_global(matrix, reference, istwf_k)
1766 
1767 !Arguments ------------------------------------
1768  class(matrix_scalapack),intent(inout) :: matrix
1769  integer,intent(in) :: istwf_k
1770  real(dp),intent(in) :: reference(*)
1771 
1772 !Local variables-------------------------------
1773  integer :: i,j,iglob,jglob,ind
1774  real(dp) :: val_real
1775  complex(dp) :: val_cplx
1776 
1777 ! *********************************************************************
1778 
1779  do i=1,matrix%sizeb_local(1)
1780    do j=1,matrix%sizeb_local(2)
1781      call matrix%loc2glob(i, j, iglob, jglob)
1782 
1783      if (istwf_k/=2) then
1784         ind = jglob*(jglob-1)+2*iglob-1
1785         val_cplx = dcmplx(reference(ind),reference(ind+1))
1786         call matrix_set_local_cplx(matrix,i,j,val_cplx)
1787     else
1788        ind = (jglob*(jglob-1))/2 + iglob
1789        val_real = reference(ind)
1790        call matrix_set_local_real(matrix,i,j,val_real)
1791      end if
1792 
1793    end do
1794  end do
1795 
1796 end subroutine matrix_from_global

m_slk/matrix_from_global_sym [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  matrix_from_global_sym

FUNCTION

INPUTS

  istwf_k= 2 if we have a real matrix else complex.
  reference= one-dimensional array

SIDE EFFECTS

  matrix= the matrix to process

SOURCE

1816 subroutine matrix_from_global_sym(matrix, reference, istwf_k)
1817 
1818 !Arguments ------------------------------------
1819  class(matrix_scalapack),intent(inout)  :: matrix
1820  real(dp),intent(in) :: reference(:)
1821  integer,intent(in) :: istwf_k
1822 
1823 !Local variables-------------------------------
1824  integer :: i,j,iglob,jglob,ind
1825  complex(dp):: val_cplx
1826  real(dp) ::val_real
1827 
1828 ! *********************************************************************
1829 
1830  do i=1,matrix%sizeb_local(1)
1831    do j=1,matrix%sizeb_local(2)
1832      call matrix%loc2glob(i,j,iglob,jglob)
1833      if(jglob < iglob) then
1834         ind = iglob*(iglob-1)+2*jglob-1
1835      else
1836         ind = jglob*(jglob-1)+2*iglob-1
1837      end if
1838      if (istwf_k/=2) then
1839         val_cplx = dcmplx(reference(ind),reference(ind+1))
1840         if(jglob < iglob) then
1841            call matrix_set_local_cplx(matrix,i,j,conjg(val_cplx))
1842         else
1843            call matrix_set_local_cplx(matrix,i,j,val_cplx)
1844         end if
1845      else
1846         ind = (ind + 1) / 2
1847         val_real = reference(ind)
1848         call matrix_set_local_real(matrix,i,j,val_real)
1849      end if
1850    end do
1851  end do
1852 
1853 end subroutine matrix_from_global_sym

m_slk/matrix_from_realmatrix [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  matrix_from_realmatrix

FUNCTION

  Routine to fill a SCALAPACK matrix from a real global matrix (FULL STORAGE MODE)

INPUTS

  istwf_k= 2 if we have a real matrix else complex.
  reference= a real matrix

SIDE EFFECTS

  matrix= the matrix to process

SOURCE

1874 subroutine matrix_from_realmatrix(matrix, reference, istwf_k)
1875 
1876 !Arguments ------------------------------------
1877  class(matrix_scalapack),intent(inout) :: matrix
1878  integer,intent(in) :: istwf_k
1879 !arrays
1880  real(dp),intent(in) :: reference(:,:)
1881 
1882 !Local variables-------------------------------
1883  integer :: i,j,iglob,jglob
1884  real(dp) :: val
1885 
1886 ! *********************************************************************
1887 
1888  ABI_UNUSED(istwf_k)
1889 
1890  do i=1,matrix%sizeb_local(1)
1891    do j=1,matrix%sizeb_local(2)
1892      call matrix%loc2glob(i, j, iglob, jglob)
1893      val = reference(iglob, jglob)
1894      call matrix_set_local_real(matrix,i,j,val)
1895    end do
1896  end do
1897 
1898 end subroutine matrix_from_realmatrix

m_slk/matrix_get_local_cplx [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  matrix_get_local_cplx

FUNCTION

  Returns a local matrix coefficient of complex type.
  Access to a component thanks to its local indices

INPUTS

  matrix= the matrix to process
  i= row in the matrix
  j= column in the matrix

OUTPUT

  The value of the local matrix.

SOURCE

1396 pure complex(dpc) function matrix_get_local_cplx(matrix, i, j)
1397 
1398 !Arguments ------------------------------------
1399  class(matrix_scalapack),intent(in) :: matrix
1400  integer, intent(in) :: i,j
1401 
1402 ! *********************************************************************
1403 
1404  matrix_get_local_cplx = matrix%buffer_cplx(i,j)
1405 
1406 end function matrix_get_local_cplx

m_slk/matrix_get_local_real [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  matrix_get_local_real

FUNCTION

  Returns a local matrix coefficient of double precision type.

INPUTS

  matrix= the matrix to process
  i= row in the matrix
  j= column in the matrix

SOURCE

1425 pure real(dp) function matrix_get_local_real(matrix,i,j)
1426 
1427 !Arguments ------------------------------------
1428  class(matrix_scalapack),intent(in) :: matrix
1429  integer, intent(in) :: i,j
1430 
1431 ! *********************************************************************
1432 
1433  matrix_get_local_real = matrix%buffer_real(i,j)
1434 
1435 end function matrix_get_local_real

m_slk/matrix_scalapack [ Types ]

[ Top ] [ m_slk ] [ Types ]

NAME

  matrix_scalapack

FUNCTION

  high-level interface to ScaLAPACK matrix (double precision version)

SOURCE

222  type, public, extends(basemat_t) :: matrix_scalapack
223 
224    real(dp),allocatable :: buffer_real(:,:)
225     ! local part of the (real) matrix.
226     ! The istwf_k option passed to the constructor defines whether we have a real or complex matrix
227 
228    complex(dpc),allocatable :: buffer_cplx(:,:)
229     ! local part of the (complex) matrix
230 
231  contains
232 
233    procedure :: get_head_and_wings => slk_get_head_and_wings
234     ! Return global arrays with the head and the wings of the matrix.
235 
236    procedure :: set_head_and_wings => slk_set_head_and_wings
237     ! Set head and the wings of the matrix starting from global arrays.
238 
239    procedure :: copy => matrix_scalapack_copy
240     ! Copy object
241 
242    procedure :: hpd_invert => slk_hpd_invert
243     ! Inverse of a Hermitian positive definite matrix.
244 
245    procedure :: ptrans => slk_ptrans
246     ! Transpose matrix
247 
248    procedure :: cut => slk_cut
249     ! Extract submatrix and create new matrix with `size_blocs` and `processor`
250 
251    procedure :: take_from => slk_take_from
252     ! Take values from source
253 
254    procedure :: collect_cplx => slk_collect_cplx
255     ! Return on all processors the submatrix of shape (mm, nn) starting at position ija.
256 
257    procedure :: heev => slk_heev
258     ! Compute eigenvalues and, optionally, eigenvectors of an Hermitian matrix A. A * X = lambda * X
259 
260    procedure :: pzheevx => slk_pzheevx
261     ! Compute Eigenvalues and, optionally, eigenvectors of a complex Hermitian matrix A. ! A * X = lambda *  X
262 
263    procedure :: pzhegvx => slk_pzhegvx
264     ! Eigenvalues and, optionally, eigenvectors of a complex
265     ! generalized Hermitian-definite eigenproblem, of the form
266     ! sub( A )*x=(lambda)*sub( B )*x,  sub( A )*sub( B )x=(lambda)*x,
267     ! or sub( B )*sub( A )*x=(lambda)*x.
268 
269    procedure :: symmetrize => slk_symmetrize
270     ! Symmetrizes a square scaLAPACK matrix.
271 
272    procedure :: bsize_and_type => slk_bsize_and_type
273     ! Returns the byte size and the MPI datatype
274 
275  end type matrix_scalapack

m_slk/matrix_scalapack_copy [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  matrix_scalapack_copy

FUNCTION

  Copy in_mat to out_mat. If empty is True, the values in the local buffer are not copied. Default: False

SOURCE

1056 subroutine matrix_scalapack_copy(in_mat, out_mat, empty)
1057 
1058 !Arguments ------------------------------------
1059  class(matrix_scalapack),intent(in) :: in_mat
1060  class(matrix_scalapack),intent(out) :: out_mat
1061  logical,optional,intent(in) :: empty
1062 
1063 !Local variables-------------------------------
1064  logical :: empty__
1065 
1066 ! *********************************************************************
1067 
1068  call out_mat%init(in_mat%sizeb_global(1), in_mat%sizeb_global(2), in_mat%processor, in_mat%istwf_k, &
1069                    size_blocs=in_mat%sizeb_blocs)
1070 
1071  empty__ = .False.; if (present(empty)) empty__ = empty
1072  if (.not. empty__) then
1073    if (in_mat%istwf_k == 1) then
1074      out_mat%buffer_cplx = in_mat%buffer_cplx
1075    else
1076      out_mat%buffer_real = in_mat%buffer_real
1077    end if
1078  end if
1079 
1080 end subroutine matrix_scalapack_copy

m_slk/matrix_scalapack_free [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  matrix_scalapack_free

FUNCTION

  Free dynamic memory

SOURCE

1132 subroutine matrix_scalapack_free(mat)
1133 
1134 !Arguments ------------------------------------
1135  class(basemat_t),intent(inout) :: mat
1136 
1137 ! *********************************************************************
1138 
1139  ! Don't free the grid. Just nullify the pointer as there might be other objects keeping a ref to processor.
1140  mat%processor => null()
1141 
1142  mat%sizeb_global = 0
1143  mat%sizeb_blocs = 0
1144  mat%sizeb_local = 0
1145  mat%descript%tab = 0
1146 
1147  select type (mat)
1148  class is (matrix_scalapack)
1149    ABI_SFREE(mat%buffer_cplx)
1150    ABI_SFREE(mat%buffer_real)
1151  class is (slkmat_sp_t)
1152    ABI_SFREE(mat%buffer_cplx)
1153    ABI_SFREE(mat%buffer_real)
1154  class default
1155    ABI_ERROR("Wrong class")
1156  end select
1157 
1158 end subroutine matrix_scalapack_free

m_slk/matrix_set_local_cplx [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  matrix_set_local_cplx

FUNCTION

  Sets a local matrix coefficient of complex type.
 -------------------------------------------------------
  Positioning of a component of a matrix thanks to its local indices
 -------------------------------------------------------

INPUTS

  i= row in the matrix
  j= column in the matrix
  value= the value to set

SIDE EFFECTS

  matrix%buffer_cplx(i,j) filled with value

SOURCE

1460 pure subroutine matrix_set_local_cplx(matrix,i,j,value)
1461 
1462 !Arguments ------------------------------------
1463  class(matrix_scalapack),intent(inout) :: matrix
1464  integer, intent(in) :: i,j
1465  complex(dp), intent(in) :: value
1466 
1467 ! *********************************************************************
1468 
1469  matrix%buffer_cplx(i,j) = value
1470 
1471 end subroutine matrix_set_local_cplx

m_slk/matrix_set_local_real [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  matrix_set_local_real

FUNCTION

  Sets a local matrix coefficient of double precision type.

INPUTS

  i= row in the matrix
  j= column in the matrix
  value= the value to set

SIDE EFFECTS

  matrix%buffer_real(i,j) set to value

SOURCE

1493 pure subroutine matrix_set_local_real(matrix, i, j, value)
1494 
1495 !Arguments ------------------------------------
1496  class(matrix_scalapack),intent(inout) :: matrix
1497  integer, intent(in) :: i,j
1498  real(dp), intent(in) :: value
1499 
1500 ! *********************************************************************
1501 
1502  matrix%buffer_real(i,j) = value
1503 
1504 end subroutine matrix_set_local_real

m_slk/matrix_to_complexmatrix [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  matrix_to_complexmatrix

FUNCTION

  Inserts a ScaLAPACK matrix into a complex matrix in FULL STORAGE MODE.

INPUTS

  matrix= the matrix to process
  istwf_k= 2 if we have a real matrix else complex.

SIDE EFFECTS

  reference= the matrix to fill

SOURCE

2059 subroutine matrix_to_complexmatrix(matrix, reference, istwf_k)
2060 
2061 !Arguments ------------------------------------
2062  integer,intent(in) :: istwf_k
2063  class(matrix_scalapack),intent(in) :: matrix
2064 !arrays
2065  complex(dpc),intent(inout) :: reference(:,:)
2066 
2067 !Local variables-------------------------------
2068  integer  :: i,j,iglob,jglob
2069 
2070 ! *********************************************************************
2071 
2072  ABI_UNUSED(istwf_k)
2073 
2074  do i=1,matrix%sizeb_local(1)
2075    do j=1,matrix%sizeb_local(2)
2076      call matrix%loc2glob(i, j, iglob, jglob)
2077      reference(iglob,jglob) = matrix_get_local_cplx(matrix,i,j)
2078    end do
2079  end do
2080 
2081 end subroutine matrix_to_complexmatrix

m_slk/matrix_to_global [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  matrix_to_global

FUNCTION

  Inserts a ScaLAPACK matrix into a global one in PACKED storage mode.

INPUTS

  matrix= the matrix to process
  istwf_k= 2 if we have a real matrix else complex.
  nband_k= number of bands at this k point for that spin polarization

SIDE EFFECTS

  reference= one-dimensional array

SOURCE

1965 subroutine matrix_to_global(matrix, reference, istwf_k)
1966 
1967 !Arguments ------------------------------------
1968  class(matrix_scalapack),intent(in) :: matrix
1969  integer,intent(in) :: istwf_k          !,nband_k
1970  real(dp),intent(inout) :: reference(*) !(nband_k*(nband_k+1))
1971 
1972 !Local variables-------------------------------
1973  integer  :: i,j,iglob,jglob,ind
1974 
1975 ! *********************************************************************
1976 
1977  do i=1,matrix%sizeb_local(1)
1978    do j=1,matrix%sizeb_local(2)
1979      call matrix%loc2glob(i, j, iglob, jglob)
1980 
1981      ind = jglob*(jglob-1)+2*iglob-1
1982      if (ind <= matrix%sizeb_global(2)*(matrix%sizeb_global(2)+1)) then
1983         if (istwf_k/=2) then
1984          reference(ind)   = real(matrix_get_local_cplx(matrix,i,j))
1985          reference(ind+1) = aimag(matrix_get_local_cplx(matrix,i,j))
1986        else
1987           ind=(ind+1)/2 !real packed storage
1988           reference(ind) = matrix_get_local_real(matrix,i,j)
1989        end if
1990      end if
1991    end do
1992  end do
1993 
1994 end subroutine matrix_to_global

m_slk/matrix_to_realmatrix [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  matrix_to_realmatrix

FUNCTION

  Inserts a ScaLAPACK matrix into a real matrix in FULL STORAGE MODE.

INPUTS

  matrix= the matrix to process
  istwf_k= 2 if we have a real matrix else complex.

SIDE EFFECTS

  reference= the matrix to fill

SOURCE

2015 subroutine matrix_to_realmatrix(matrix, reference, istwf_k)
2016 
2017 !Arguments ------------------------------------
2018  class(matrix_scalapack),intent(in) :: matrix
2019  integer,intent(in) :: istwf_k
2020 !arrays
2021  real(dp),intent(inout) :: reference(:,:)
2022 
2023 !Local variables-------------------------------
2024  integer :: i,j,iglob,jglob
2025  !complex(dpc) :: zvar
2026 
2027 ! *********************************************************************
2028 
2029  ABI_UNUSED(istwf_k)
2030 
2031  do i=1,matrix%sizeb_local(1)
2032    do j=1,matrix%sizeb_local(2)
2033      call matrix%loc2glob(i, j, iglob, jglob)
2034      reference(iglob,jglob) = matrix_get_local_real(matrix,i,j)
2035    end do
2036  end do
2037 
2038 end subroutine matrix_to_realmatrix

m_slk/matrix_to_reference [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  matrix_to_reference

FUNCTION

  Routine to fill a full matrix with respect to a SCALAPACK matrix.

INPUTS

  matrix= the matrix to process
  istwf_k= 2 if we have a real matrix else complex.

SIDE EFFECTS

  reference= one-dimensional array

SOURCE

2102 subroutine matrix_to_reference(matrix, reference, istwf_k)
2103 
2104 !Arguments ------------------------------------
2105  class(matrix_scalapack),intent(in) :: matrix
2106  integer,intent(in) :: istwf_k
2107 !arrays
2108  real(dp),intent(inout) :: reference(:,:)
2109 
2110 !Local variables-------------------------------
2111  integer  :: i,j,iglob,jglob,ind
2112 
2113 ! *********************************************************************
2114 
2115  do i=1,matrix%sizeb_local(1)
2116    do j=1,matrix%sizeb_local(2)
2117      call matrix%loc2glob(i, j, iglob, jglob)
2118 
2119      if (istwf_k/=2) then
2120        ind=(iglob-1)*2+1
2121        reference(ind,jglob)   = real(matrix_get_local_cplx(matrix,i,j))
2122        reference(ind+1,jglob) = aimag(matrix_get_local_cplx(matrix,i,j))
2123      else
2124         ind=iglob
2125         reference(ind,jglob)   = matrix_get_local_real(matrix,i,j)
2126         !reference(ind+1,jglob) = 0._dp
2127      end if
2128 
2129    end do
2130  end do
2131 
2132 end subroutine matrix_to_reference

m_slk/my_locc [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

 my_locc

FUNCTION

  Method of matrix_scalapack wrapping the scaLAPACK tool LOCc.

OUTPUT

  my_locc= For the meaning see NOTES below.

NOTES

  Let K be the number of rows or columns of a distributed matrix, and assume that its process grid has dimension p x q.
  LOCr( K ) denotes the number of elements of K that a process would receive if K were distributed over the p
  processes of its process column.
  Similarly,  LOCc(  K  ) denotes the number of elements of K that a process would receive if K were distributed over
  the q processes of its process row.
  The values of LOCr() and LOCc() may be determined via a call to the ScaLAPACK tool function, NUMROC:
          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).  An upper bound for these quantities may  be  computed
  by:
          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A

SOURCE

2461 integer function my_locc(mat)
2462 
2463 !Arguments ------------------------------------
2464  class(basemat_t),intent(in) :: mat
2465 
2466 #ifdef HAVE_LINALG_SCALAPACK
2467 !Local variables-------------------------------
2468  integer :: N, NB_A, MYCOL, CSRC_A, NPCOL
2469  integer,external :: NUMROC
2470 
2471 ! *************************************************************************
2472 
2473  N      = mat%descript%tab(N_ )      ! The number of columns in the global matrix.
2474  NB_A   = mat%descript%tab(NB_)      ! The number of columns in a block.
2475  MYCOL  = mat%processor%coords(2)    ! The column index of my processor
2476  CSRC_A = mat%descript%tab(CSRC_)    ! The column of the processors at the beginning.
2477  NPCOL  = mat%processor%grid%dims(2) ! The number of processors per column in the Scalapack grid.
2478 
2479  my_locc = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL )
2480 #endif
2481 
2482 end function my_locc

m_slk/my_locr [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

 my_locr

FUNCTION

  Method of matrix_scalapack wrapping the scaLAPACK tool LOCr.

OUTPUT

  my_locr= For the meaning see NOTES below.

NOTES

  Let K be the number of rows or columns of a distributed matrix, and assume that its process grid has dimension p x q.
  LOCr( K ) denotes the number of elements of K that a process would receive if K were distributed over the p
  processes of its process column.
  Similarly,  LOCc(  K  ) denotes the number of elements of K that a process would receive if K were distributed over
  the q processes of its process row.
  The values of LOCr() and LOCc() may be determined via a call to the ScaLAPACK tool function, NUMROC:
          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).  An upper bound for these quantities may  be  computed
  by:
          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A

SOURCE

2410 integer function my_locr(mat)
2411 
2412 !Arguments ------------------------------------
2413  class(basemat_t),intent(in) :: mat
2414 
2415 !Local variables-------------------------------
2416 #ifdef HAVE_LINALG_SCALAPACK
2417  integer :: M, MB_A, MYROW, RSRC_A, NPROW
2418  integer,external :: NUMROC
2419 
2420 ! *************************************************************************
2421 
2422  M      = mat%descript%tab(M_ )      ! The number of rows in the global matrix.
2423  MB_A   = mat%descript%tab(MB_)      ! The number of rows in a block.
2424  MYROW  = mat%processor%coords(1)    ! The row index of my processor
2425  RSRC_A = mat%descript%tab(RSRC_)    ! The row of the processors at the beginning.
2426  NPROW  = mat%processor%grid%dims(1) ! The number of processors per row in the Scalapack grid.
2427 
2428  my_locr = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW )
2429 #endif
2430 
2431 end function my_locr

m_slk/processor_free [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  processor_free

FUNCTION

  Removes a processor from the ScaLAPACK grid.

SOURCE

552 subroutine processor_free(processor)
553 
554 !Arguments ------------------------------------
555  class(processor_scalapack),intent(inout) :: processor
556 
557 ! *********************************************************************
558 
559 #ifdef HAVE_LINALG_SCALAPACK
560  if (processor%grid%ictxt /= xmpi_comm_null) then
561    call BLACS_GRIDEXIT(processor%grid%ictxt)
562    !call BLACS_EXIT(0)
563  end if
564 #endif
565 
566 end subroutine processor_free

m_slk/processor_init [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  processor_init

FUNCTION

  Initializes an instance of processor ScaLAPACK from an MPI communicator.

INPUTS

  comm= MPI communicator
  [grid_dims]=Number of procs for each dimension.

OUTPUT

  processor= descriptor of a processor

SOURCE

514 subroutine processor_init(processor, comm, grid_dims)
515 
516 !Arguments ------------------------------------
517  class(processor_scalapack),intent(out) :: processor
518  integer, intent(in) :: comm
519  integer,optional,intent(in) :: grid_dims(2)
520 
521 !Local variables-------------------------------
522  type(grid_scalapack) :: grid
523  integer :: nbproc,myproc
524 
525 ! *********************************************************************
526 
527  nbproc = xmpi_comm_size(comm)
528  myproc = xmpi_comm_rank(comm)
529 
530  if (present(grid_dims)) then
531    call grid%init(nbproc, comm, .false., grid_dims=grid_dims)
532  else
533    call grid%init(nbproc, comm, .false.)
534  end if
535 
536  call build_processor_scalapack(processor, grid, myproc, comm)
537 
538 end subroutine processor_init

m_slk/processor_scalapack [ Types ]

[ Top ] [ m_slk ] [ Types ]

NAME

  processor_scalapack

FUNCTION

  One processor in the grid.

SOURCE

101  type,public :: processor_scalapack
102 
103    integer :: myproc = -1
104    ! rank the processor
105 
106    integer :: comm = xmpi_comm_null
107    ! MPI communicator underlying the BLACS grid.
108 
109    integer :: coords(2) = -1
110    ! Coordinates of the processor in the grid.
111 
112    type(grid_scalapack) :: grid
113    ! the grid to which the processor is associated to.
114 
115  contains
116    procedure :: init => processor_init        ! Initializes an instance of processor ScaLAPACK from an MPI communicator.
117    procedure :: free => processor_free        ! Free the object
118  end type processor_scalapack
119 
120  private :: build_processor_scalapack     ! Build a ScaLAPACK processor descriptor

m_slk/slk_array1_free [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_array1_free

FUNCTION

  Deallocate 1d array of matrix_scalapack elements

SOURCE

1172 subroutine slk_array1_free(slk_arr1)
1173   class(basemat_t),intent(inout) :: slk_arr1(:)
1174   integer :: i1
1175   do i1=1,size(slk_arr1, dim=1)
1176     call slk_arr1(i1)%free()
1177   end do
1178 end subroutine slk_array1_free

m_slk/slk_array2_free [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_array2_free

FUNCTION

  Deallocate 2d array of matrix_scalapack elements

SOURCE

1192 subroutine slk_array2_free(slk_arr2)
1193   class(basemat_t),intent(inout) :: slk_arr2(:,:)
1194   integer :: i1, i2
1195   do i2=1,size(slk_arr2, dim=2)
1196     do i1=1,size(slk_arr2, dim=1)
1197       call slk_arr2(i1, i2)%free()
1198     end do
1199   end do
1200 end subroutine slk_array2_free

m_slk/slk_array3_free [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_array3_free

FUNCTION

  Deallocate 3d array of matrix_scalapack elements

SOURCE

1214 subroutine slk_array3_free(slk_arr3)
1215   class(basemat_t),intent(inout) :: slk_arr3(:,:,:)
1216   integer :: i1, i2, i3
1217   do i3=1,size(slk_arr3, dim=3)
1218     do i2=1,size(slk_arr3, dim=2)
1219       do i1=1,size(slk_arr3, dim=1)
1220         call slk_arr3(i1, i2, i3)%free()
1221       end do
1222     end do
1223   end do
1224 end subroutine slk_array3_free

m_slk/slk_array4_free [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_array4_free

FUNCTION

  Deallocate 4d array of matrix_scalapack elements

SOURCE

1238 subroutine slk_array4_free(slk_arr4)
1239   class(basemat_t),intent(inout) :: slk_arr4(:,:,:,:)
1240   integer :: i1, i2, i3, i4
1241   do i4=1,size(slk_arr4, dim=4)
1242     do i3=1,size(slk_arr4, dim=3)
1243       do i2=1,size(slk_arr4, dim=2)
1244         do i1=1,size(slk_arr4, dim=1)
1245           call slk_arr4(i1, i2, i3, i4)%free()
1246         end do
1247       end do
1248     end do
1249   end do
1250 end subroutine slk_array4_free

m_slk/slk_array_locmem_mb [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_array_locmem_mb

FUNCTION

  Elemental function to compute the memory allocated for an array of matrix_scalapack elements
  Usage: mem_mb = sum(mat_array)

SOURCE

1295 elemental real(dp) function slk_array_locmem_mb(mat) result(mem_mb)
1296   class(basemat_t),intent(in) :: mat
1297   mem_mb = mat%locmem_mb()
1298 end function slk_array_locmem_mb

m_slk/slk_array_set [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_array_set

FUNCTION

  Elemental routine to set the value of the PBLAS buffer to a costant value `cvalue`.
  Usually used to zero all the buffers in an array of matrix_scalapack objects.

SOURCE

1265 elemental subroutine slk_array_set(mat, cvalue)
1266 
1267 !Arguments ------------------------------------
1268  class(basemat_t),intent(inout) :: mat
1269  complex(dp),intent(in) :: cvalue
1270 
1271  select type (mat)
1272  class is (matrix_scalapack)
1273    if (allocated(mat%buffer_cplx)) mat%buffer_cplx = cvalue
1274    if (allocated(mat%buffer_real)) mat%buffer_real = real(cvalue, kind=dp)
1275  class is (slkmat_sp_t)
1276    if (allocated(mat%buffer_cplx)) mat%buffer_cplx = cmplx(cvalue, kind=sp)
1277    if (allocated(mat%buffer_real)) mat%buffer_real = real(cvalue, kind=sp)
1278  end select
1279 
1280 end subroutine slk_array_set

m_slk/slk_bsize_and_type [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_bsize_and_type

FUNCTION

  Returns the byte size and the MPI datatype associated to the matrix elements
  that are stored in the ScaLAPACK_matrix

INPUTS

  Slk_mat<matrix_scalapack>=Structured datatype defining the scaLAPACK distribution with the local buffer

OUTPUT

  bsize_elm=Byte size of the matrix element.
  mpi_type_elm=MPI datatype of the matrix element.

SOURCE

6497 subroutine slk_bsize_and_type(Slk_mat, bsize_elm, mpi_type_elm)
6498 
6499 !Arguments ------------------------------------
6500 !scalars
6501  class(matrix_scalapack),intent(in) :: Slk_mat
6502  integer,intent(out) :: bsize_elm,mpi_type_elm
6503 
6504 !Local variables ------------------------------
6505 !scalars
6506  integer :: ierr
6507  character(len=500) :: msg
6508 
6509 ! ************************************************************************
6510 
6511  ! @matrix_scalapack
6512  ierr=0
6513 #ifdef HAVE_MPI
6514  if (allocated(Slk_mat%buffer_cplx)) then
6515    ierr = ierr + 1
6516    mpi_type_elm = MPI_DOUBLE_COMPLEX
6517    bsize_elm    = xmpi_bsize_dpc
6518  end if
6519 
6520  if (allocated(Slk_mat%buffer_real)) then
6521    ierr = ierr + 1
6522    mpi_type_elm = MPI_DOUBLE_PRECISION
6523    bsize_elm    = xmpi_bsize_dp
6524  end if
6525 #endif
6526 
6527  ! One and only one buffer should be allocated.
6528  if (ierr /= 1) then
6529    write(msg,'(a,i0)')" ScaLAPACK buffers are not allocated correctly, ierr= ",ierr
6530    ABI_ERROR(msg)
6531  end if
6532 
6533 end subroutine slk_bsize_and_type

m_slk/slk_change_size_blocs [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_change_size_blocs

FUNCTION

  Change the block sizes, return new matrix in out_mat

INPUTS

  [free]: True if `in_mat` should be deallocated. Default: False

OUTPUT

SOURCE

4787 subroutine slk_change_size_blocs(in_mat, out_mat, &
4788                                  size_blocs, processor, free)  ! Optional
4789 
4790 !Arguments ------------------------------------
4791  class(basemat_t),target,intent(inout) :: in_mat
4792  class(basemat_t),intent(out) :: out_mat
4793  integer,optional,intent(in) :: size_blocs(2)
4794  class(processor_scalapack), target, optional,intent(in) :: processor
4795  logical,optional,intent(in) :: free
4796 
4797 !Local variables-------------------------------
4798  type(processor_scalapack), pointer :: processor__
4799 
4800 ! *************************************************************************
4801 
4802  processor__ => in_mat%processor; if (present(processor)) processor__ => processor
4803 
4804  if (present(size_blocs)) then
4805    call out_mat%init(in_mat%sizeb_global(1), in_mat%sizeb_global(2), processor__, in_mat%istwf_k, size_blocs=size_blocs)
4806  else
4807    call out_mat%init(in_mat%sizeb_global(1), in_mat%sizeb_global(2), processor__, in_mat%istwf_k)
4808  end if
4809  !call out_mat%print(header="output matrix generated by slk_change_size_blocs")
4810 
4811  ABI_CHECK(same_type_as(in_mat, out_mat), "in_mat and out_mat should have same type!")
4812 
4813  ! p?gemr2d: Copies a submatrix from one general rectangular matrix to another.
4814  ! prototype
4815  !call pzgemr2d(m, n, a, ia, ja, desca, b, ib, jb, descb, ictxt)
4816 
4817 #ifdef HAVE_LINALG_SCALAPACK
4818  select type (in_mat)
4819  class is (matrix_scalapack)
4820    select type (out_mat)
4821    class is (matrix_scalapack)
4822    if (allocated(in_mat%buffer_cplx)) then
4823      call pzgemr2d(in_mat%sizeb_global(1), in_mat%sizeb_global(2),  &
4824                    in_mat%buffer_cplx, 1, 1, in_mat%descript%tab,   &
4825                    out_mat%buffer_cplx, 1, 1, out_mat%descript%tab, &
4826                    processor__%grid%ictxt)
4827 
4828    else if (allocated(in_mat%buffer_real)) then
4829      call pdgemr2d(in_mat%sizeb_global(1), in_mat%sizeb_global(2),  &
4830                    in_mat%buffer_real, 1, 1, in_mat%descript%tab,   &
4831                    out_mat%buffer_real, 1, 1, out_mat%descript%tab, &
4832                    processor__%grid%ictxt)
4833    else
4834      ABI_ERROR("Neither buffer_cplx nor buffer_real are allocated!")
4835    end if
4836    end select
4837 
4838  class is (slkmat_sp_t)
4839    select type (out_mat)
4840    class is (slkmat_sp_t)
4841    if (allocated(in_mat%buffer_cplx)) then
4842      call pcgemr2d(in_mat%sizeb_global(1), in_mat%sizeb_global(2),  &
4843                    in_mat%buffer_cplx, 1, 1, in_mat%descript%tab,   &
4844                    out_mat%buffer_cplx, 1, 1, out_mat%descript%tab, &
4845                    processor__%grid%ictxt)
4846 
4847    else if (allocated(in_mat%buffer_real)) then
4848      call psgemr2d(in_mat%sizeb_global(1), in_mat%sizeb_global(2),  &
4849                    in_mat%buffer_real, 1, 1, in_mat%descript%tab,   &
4850                    out_mat%buffer_real, 1, 1, out_mat%descript%tab, &
4851                    processor__%grid%ictxt)
4852    else
4853      ABI_ERROR("Neither buffer_cplx nor buffer_real are allocated!")
4854    end if
4855    end select
4856 
4857  class default
4858    ABI_ERROR("Wrong class")
4859  end select
4860 #endif
4861 
4862  if (present(free)) then
4863    if (free) call in_mat%free()
4864  end if
4865 
4866 end subroutine slk_change_size_blocs

m_slk/slk_collect_cplx [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_collect_cplx

FUNCTION

  Return on all processors the complex submatrix of shape (mm, nn) starting at position ija.
  NB: `out_carr` is allocated by the routine.
  If optional argument request is use, the routine uses non-blocking BCAST and client code is
  supposed to wait before accessing out_carr.

SOURCE

5128 subroutine slk_collect_cplx(in_mat, mm, nn, ija, out_carr, request)
5129 
5130 !Arguments ------------------------------------
5131  class(matrix_scalapack),intent(in) :: in_mat
5132  integer,intent(in) :: mm, nn, ija(2)
5133  complex(dp) ABI_ASYNC, allocatable,intent(out) :: out_carr(:,:)
5134  integer ABI_ASYNC, optional,intent(out) :: request
5135 
5136 !Local variables-------------------------------
5137  integer,parameter :: master = 0
5138  integer :: ierr
5139  type(processor_scalapack) :: self_processor
5140  type(matrix_scalapack) :: out_mat
5141 
5142 ! *************************************************************************
5143 
5144  ABI_CHECK(allocated(in_mat%buffer_cplx), "buffer_cplx is not allocated")
5145 
5146  if (in_mat%processor%grid%nbprocs == 1) then
5147    ! Copy buffer and return
5148    ABI_MALLOC(out_carr, (mm, nn))
5149    out_carr(:,:) = in_mat%buffer_cplx(ija(1):ija(1)+mm-1, ija(2):ija(2)+nn-1); return
5150  end if
5151 
5152  ! Two-step algorithm:
5153  !     1) Use pzgemr2d to collect submatrix on master.
5154  !     2) Master brodacasts submatrix.
5155 
5156  if (in_mat%processor%myproc == master) then
5157    call self_processor%init(xmpi_comm_self)
5158    call out_mat%init(mm, nn, self_processor, in_mat%istwf_k, size_blocs=[mm, nn])
5159  else
5160    out_mat%descript%tab(CTXT_) = -1
5161  end if
5162 
5163 #ifdef HAVE_LINALG_SCALAPACK
5164  call pzgemr2d(mm, nn,  &
5165                in_mat%buffer_cplx, ija(1), ija(2), in_mat%descript%tab,   &
5166                out_mat%buffer_cplx, 1, 1, out_mat%descript%tab, &
5167                in_mat%processor%grid%ictxt)
5168 #endif
5169 
5170  if (in_mat%processor%myproc == master) then
5171    ABI_MOVE_ALLOC(out_mat%buffer_cplx, out_carr)
5172    call out_mat%free()
5173    call self_processor%free()
5174  else
5175    ABI_MALLOC(out_carr, (mm, nn))
5176  end if
5177 
5178  if (present(request)) then
5179    call xmpi_ibcast(out_carr, master, in_mat%processor%comm, request, ierr)
5180  else
5181    call xmpi_bcast(out_carr, master, in_mat%processor%comm, ierr)
5182  end if
5183 
5184 end subroutine slk_collect_cplx

m_slk/slk_cut [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_cut

FUNCTION

  Extract submatrix of shape (glob_nrows, glob_ncols) starting at `ija` from `in_mat`
  and create new matrix with `size_blocs` and `processor`

INPUTS

  [free]: True if `in_mat` should be deallocated. Default: False

OUTPUT

SOURCE

4886 subroutine slk_cut(in_mat, glob_nrows, glob_ncols, out_mat, &
4887                    size_blocs, processor, ija, ijb, free)  ! Optional
4888 
4889 !Arguments ------------------------------------
4890  class(matrix_scalapack),target,intent(inout) :: in_mat
4891  integer,intent(in) :: glob_nrows, glob_ncols
4892  class(matrix_scalapack),intent(out) :: out_mat
4893  integer,optional,intent(in) :: size_blocs(2)
4894  class(processor_scalapack), target, optional,intent(in) :: processor
4895  integer,optional,intent(in) :: ija(2), ijb(2)
4896  logical,optional,intent(in) :: free
4897 
4898 !Local variables-------------------------------
4899  type(processor_scalapack), pointer :: processor__
4900  integer :: ija__(2), ijb__(2)
4901 
4902 ! *************************************************************************
4903 
4904  ija__ = [1, 1]; if (present(ija)) ija__ = ija
4905  ijb__ = [1, 1]; if (present(ijb)) ijb__ = ijb
4906 
4907  processor__ => in_mat%processor; if (present(processor)) processor__ => processor
4908 
4909  if (present(size_blocs)) then
4910    call out_mat%init(glob_nrows, glob_ncols, processor__, in_mat%istwf_k, size_blocs=size_blocs)
4911  else
4912    call out_mat%init(glob_nrows, glob_ncols, processor__, in_mat%istwf_k)
4913  end if
4914  !call out_mat%print(header="output matrix generated by slk_cut")
4915 
4916  ! p?gemr2d: Copies a submatrix from one general rectangular matrix to another.
4917  ! prototype
4918  !call pzgemr2d(m, n, a, ia, ja, desca, b, ib, jb, descb, ictxt)
4919 
4920  if (allocated(in_mat%buffer_cplx)) then
4921 #ifdef HAVE_LINALG_SCALAPACK
4922    call pzgemr2d(glob_nrows, glob_ncols, &
4923                  in_mat%buffer_cplx, ija__(1), ija__(2), in_mat%descript%tab,   &
4924                  out_mat%buffer_cplx, ijb__(1), ijb__(2), out_mat%descript%tab, &
4925                  processor__%grid%ictxt)
4926 
4927  else if (allocated(in_mat%buffer_real)) then
4928    call pdgemr2d(glob_nrows, glob_ncols, &
4929                  in_mat%buffer_real, ija__(1), ija__(2), in_mat%descript%tab,   &
4930                  out_mat%buffer_real, ijb__(1), ijb__(2), out_mat%descript%tab, &
4931                  processor__%grid%ictxt)
4932 #endif
4933  else
4934    ABI_ERROR("Neither buffer_cplx nor buffer_real are allocated!")
4935  end if
4936 
4937  if (present(free)) then
4938    if (free) call in_mat%free()
4939  end if
4940 
4941 end subroutine slk_cut

m_slk/slk_get_head_and_wings [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_get_head_and_wings

FUNCTION

  Return global arrays with head and wings of the matrix.
  If call_mpi if False, global MPI sum is postponed.
  Useful to reduce the number of MPI calls if one has to operate on multiple matrices.

SOURCE

873 subroutine slk_get_head_and_wings(mat, head, low_wing, up_wing, call_mpi)
874 
875 !Arguments ------------------------------------
876  class(matrix_scalapack),intent(in) :: mat
877  complex(dp),intent(out) :: head, low_wing(mat%sizeb_global(1)), up_wing(mat%sizeb_global(2))
878  logical,intent(in) :: call_mpi
879 
880 !Local variables-------------------------------
881  integer :: ierr, il_g1, il_g2, iglob1, iglob2
882  logical :: is_cplx
883 
884 ! *********************************************************************
885 
886  head = zero; low_wing = zero; up_wing = zero
887 
888  is_cplx = allocated(mat%buffer_cplx)
889 
890  do il_g2=1,mat%sizeb_local(2)
891    iglob2 = mat%loc2gcol(il_g2)
892    do il_g1=1,mat%sizeb_local(1)
893      iglob1 = mat%loc2grow(il_g1)
894 
895      if (iglob1 == 1 .or. iglob2 == 1) then
896        if (iglob1 == 1 .and. iglob2 == 1) then
897          if (is_cplx) then
898            head = mat%buffer_cplx(il_g1, il_g2)
899          else
900            head = mat%buffer_real(il_g1, il_g2)
901          end if
902        else if (iglob1 == 1) then
903          if (is_cplx) then
904            up_wing(iglob2) = mat%buffer_cplx(il_g1, il_g2)
905          else
906            up_wing(iglob2) = mat%buffer_real(il_g1, il_g2)
907          end if
908        else if (iglob2 == 1) then
909          if (is_cplx) then
910            low_wing(iglob1) = mat%buffer_cplx(il_g1, il_g2)
911          else
912            low_wing(iglob1) = mat%buffer_real(il_g1, il_g2)
913          end if
914        end if
915      end if
916 
917    end do
918  end do
919 
920  if (call_mpi) then
921    call xmpi_sum(head, mat%processor%grid%ictxt, ierr)
922    call xmpi_sum(low_wing, mat%processor%grid%ictxt, ierr)
923    call xmpi_sum(up_wing, mat%processor%grid%ictxt, ierr)
924  end if
925 
926 end subroutine slk_get_head_and_wings

m_slk/slk_get_trace [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_get_trace

FUNCTION

  Compute the trace of an N-by-N distributed matrix.

INPUTS

OUTPUT

SOURCE

5275 complex(dp) function slk_get_trace(mat) result(ctrace)
5276 
5277 !Arguments ------------------------------------
5278  class(basemat_t), intent(in) :: mat
5279 
5280 !Local variables-------------------------------
5281 #ifdef HAVE_LINALG_SCALAPACK
5282  real(dp) :: rtrace
5283  real(sp) :: rtrace_sp
5284  complex(sp) :: ctrace_sp
5285  real(dp),external :: PDLATRA
5286  real(sp),external :: PSLATRA
5287  complex(sp),external :: PCLATRA
5288  complex(dp),external :: PZLATRA
5289 #endif
5290 
5291 ! *************************************************************************
5292 
5293  ABI_CHECK_IEQ(mat%sizeb_global(1), mat%sizeb_global(2), "get_trace assumes square matrix!")
5294 
5295  ! prototype for complex version.
5296  ! COMPLEX*16 FUNCTION PZLATRA( N, A, IA, JA, DESCA )
5297 #ifdef HAVE_LINALG_SCALAPACK
5298  select type (mat)
5299  class is (matrix_scalapack)
5300    if (allocated(mat%buffer_cplx)) then
5301      ctrace = PZLATRA(mat%sizeb_global(1), mat%buffer_cplx, 1, 1, mat%descript%tab)
5302    else if (allocated(mat%buffer_real)) then
5303      rtrace = PDLATRA(mat%sizeb_global(1), mat%buffer_real, 1, 1, mat%descript%tab)
5304      ctrace = rtrace
5305    else
5306      ABI_ERROR("Neither buffer_cplx nor buffer_real are allocated!")
5307    end if
5308  class is (slkmat_sp_t)
5309     if (allocated(mat%buffer_cplx)) then
5310       ctrace_sp = PCLATRA(mat%sizeb_global(1), mat%buffer_cplx, 1, 1, mat%descript%tab)
5311       ctrace = ctrace_sp
5312     else if (allocated(mat%buffer_real)) then
5313       rtrace_sp = PSLATRA(mat%sizeb_global(1), mat%buffer_real, 1, 1, mat%descript%tab)
5314       ctrace = rtrace_sp
5315     else
5316       ABI_ERROR("Neither buffer_cplx nor buffer_real are allocated!")
5317     end if
5318  class default
5319    ABI_ERROR("Wrong class")
5320  end select
5321 #endif
5322 
5323 end function slk_get_trace

m_slk/slk_glob2loc [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_glob2loc

FUNCTION

  Determine the local indices of an element from its global indices and return haveit bool flag.

INPUTS

  iloc= local row index.
  jloc= local column index.

OUTPUT

  iloc= row in the matrix
  jloc= column in the matrix
  haveit= True if (iglob, jglob) is stored on this proc

SOURCE

1597 subroutine slk_glob2loc(mat, iglob, jglob, iloc, jloc, haveit)
1598 
1599 !Arguments ------------------------------------
1600  class(basemat_t),intent(in) :: mat
1601  integer, intent(in) :: iglob, jglob
1602  integer, intent(out) :: iloc, jloc
1603  logical,intent(out) :: haveit
1604 
1605 !Local variables-------------------------------
1606  integer :: row_src, col_src
1607 
1608 ! *********************************************************************
1609 
1610 #ifdef HAVE_LINALG_SCALAPACK
1611  ! SUBROUTINE INFOG2L( GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
1612 
1613  call INFOG2L(iglob, jglob, mat%descript%tab, mat%processor%grid%dims(1), mat%processor%grid%dims(2), &
1614    mat%processor%coords(1), mat%processor%coords(2), iloc, jloc, row_src, col_src)
1615 
1616  haveit = all(mat%processor%coords == [row_src, col_src])
1617 #endif
1618 
1619 end subroutine slk_glob2loc

m_slk/slk_has_elpa [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_has_elpa

FUNCTION

  Return True if ELPA support is activated

SOURCE

1364 pure logical function slk_has_elpa() result (ans)
1365 
1366 ! *********************************************************************
1367 
1368  ans = .False.
1369 #ifdef HAVE_LINALG_ELPA
1370  ans = .True.
1371 #endif
1372 
1373 end function slk_has_elpa

m_slk/slk_heev [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

 slk_heev

FUNCTION

  slk_heev computes selected eigenvalues and, optionally, eigenvectors of an Hermitian matrix A.
   A * X = lambda * X

INPUTS

  JOBZ    (global input) CHARACTER*1
          Specifies whether or not to compute the eigenvectors:
          = "N":  Compute eigenvalues only.
          = "V":  Compute eigenvalues and eigenvectors.
  UPLO    (global input) CHARACTER*1
          Specifies whether the upper or lower triangular part of the symmetric matrix A is stored:
          = "U":  Upper triangular
          = "L":  Lower triangular

  mat=The object storing the local buffer in DOUBLE PRECISION, the array descriptor, the PBLAS context.
  vec=The distributed eigenvectors. Not referenced if JOBZ="N"

OUTPUT

  W       (global output) array, dimension (N) where N is the rank of the global matrix.
          On normal exit, the first M entries contain the selected eigenvalues in ascending order.

SIDE EFFECTS

  If JOBZ="V", the local buffer vec%buffer_cplx will contain part of the distributed eigenvectors.
  On exit, the lower triangle (if UPLO='L') or the upper triangle (if UPLO='U') of A, including the diagonal, is destroyed.

SOURCE

3527 subroutine slk_heev(mat, jobz, uplo, vec, w, &
3528                       mat_size, ija, ijz) ! Optional
3529 
3530 !Arguments ------------------------------------
3531 !scalars
3532  class(matrix_scalapack),intent(inout) :: mat
3533  character(len=*),intent(in) :: jobz, uplo
3534  class(matrix_scalapack),intent(inout) :: vec
3535 !arrays
3536  real(dp),intent(out) :: w(:)
3537  integer,optional,intent(in) :: mat_size, ija(2), ijz(2)
3538 
3539 #ifdef HAVE_LINALG_SCALAPACK
3540 !Local variables ------------------------------
3541 !scalars
3542  integer :: lwork, lrwork, info, nn
3543  !character(len=500) :: msg
3544 !arrays
3545  integer :: ija__(2), ijz__(2)
3546  real(dp),allocatable :: rwork_dp(:)
3547  complex(dp),allocatable :: work_dp(:)
3548 
3549 !************************************************************************
3550 
3551  ABI_CHECK(allocated(mat%buffer_cplx), "buffer_cplx not allocated")
3552 
3553  nn = mat%sizeb_global(2); if (present(mat_size)) nn = mat_size
3554  ija__ = [1, 1]; if (present(ija)) ija__ = ija
3555  ijz__ = [1, 1]; if (present(ijz)) ijz__ = ijz
3556 
3557  ! Get optimal size of workspace.
3558  lwork = - 1; lrwork = -1
3559  ABI_MALLOC(work_dp, (1))
3560  ABI_MALLOC(rwork_dp, (1))
3561 
3562  !call pzheev(jobz, uplo, n, a, ia, ja, desca, w, z, iz, jz, descz, work, lwork, rwork, lrwork, info)
3563 
3564  call PZHEEV(jobz, uplo, nn, mat%buffer_cplx, ija__(1), ija__(2), mat%descript%tab, &
3565              w, vec%buffer_cplx, ijz__(1), ijz__(2), vec%descript%tab, work_dp, lwork, rwork_dp, lrwork, info)
3566  ABI_CHECK(info == 0, sjoin("Error in the calculation of the workspace size, info:", itoa(info)))
3567 
3568  lwork = NINT(real(work_dp(1))); lrwork= NINT(rwork_dp(1)) !*2
3569  ABI_FREE(work_dp)
3570  ABI_FREE(rwork_dp)
3571 
3572  ! MG: Nov 23 2011. On my mac with the official scalapack package, rwork(1) is not large enough and causes a SIGFAULT.
3573  if (firstchar(jobz, ['V'])) then
3574    if (lrwork < 2*nn + 2*nn-2) lrwork = 2*nn + 2*nn-2
3575  else if (firstchar(jobz, ['N'])) then
3576    if (lrwork < 2*nn) lrwork = 2*nn
3577  end if
3578  !write(std_out,*)lwork,lrwork
3579 
3580  ! Solve the problem.
3581  ABI_MALLOC(work_dp, (lwork))
3582  ABI_MALLOC(rwork_dp, (lrwork))
3583 
3584  call PZHEEV(jobz, uplo, nn, mat%buffer_cplx, ija__(1), ija__(2), mat%descript%tab, &
3585              w, vec%buffer_cplx, ijz__(1), ijz__(2), vec%descript%tab, work_dp, lwork, rwork_dp, lrwork, info)
3586  ABI_CHECK(info == 0, sjoin("PZHEEV returned info:", itoa(info)))
3587  ABI_FREE(work_dp)
3588  ABI_FREE(rwork_dp)
3589 #endif
3590 
3591 end subroutine slk_heev

m_slk/slk_hpd_invert [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

 slk_hpd_invert

FUNCTION

  Compute the inverse of an Hermitian positive definite matrix.

INPUTS

  uplo: global input
    = 'U':  Upper triangle of sub( A ) is stored;
    = 'L':  Lower triangle of sub( A ) is stored.
  [full]: If full PBLAS matrix is neeeded. Default: True

SIDE EFFECTS

  mat= The object storing the local buffer, the array descriptor, the context, etc.
    On entry, this array contains the local pieces of the N-by-N Hermitian distributed matrix sub( A ) to be factored.
    If UPLO = 'U', the leading N-by-N upper triangular part of sub( A ) contains the upper triangular part of the matrix,
    and its strictly lower triangular part is not referenced.
    If UPLO = 'L', the leading N-by-N lower triangular part of sub( A ) contains the lower triangular part of the distribu-
    ted matrix, and its strictly upper triangular part is not referenced.
    On exit, the local pieces of the upper or lower triangle of the (Hermitian) inverse of sub( A )

SOURCE

4387 subroutine slk_hpd_invert(mat, uplo, full)
4388 
4389 !Arguments ------------------------------------
4390  character(len=*),intent(in) :: uplo
4391  class(matrix_scalapack),intent(inout) :: mat
4392  logical,optional,intent(in) :: full
4393 
4394 #ifdef HAVE_LINALG_SCALAPACK
4395 !Local variables ------------------------------
4396 !scalars
4397  integer :: info, mm, il1, il2, iglob1, iglob2
4398  type(matrix_scalapack) :: work_mat
4399  logical :: full__
4400 
4401 !************************************************************************
4402 
4403  ABI_CHECK(allocated(mat%buffer_cplx), "buffer_cplx not allocated")
4404 
4405  ! ZPOTRF computes the Cholesky factorization of a complex Hermitian positive definite.
4406  !  A = U**H * U,   if UPLO = 'U', or
4407  !  A = L  * L**H,  if UPLO = 'L',
4408  mm = mat%sizeb_global(1)
4409  call PZPOTRF(uplo, mm, mat%buffer_cplx, 1, 1, mat%descript%tab, info)
4410  ABI_CHECK(info == 0, sjoin("PZPOTRF returned info:", itoa(info)))
4411 
4412  ! PZPOTRI computes the inverse of a complex Hermitian positive definite
4413  ! distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1) using the
4414  ! Cholesky factorization sub( A ) = U**H*U or L*L**H computed by PZPOTRF.
4415  call PZPOTRI(uplo, mm, mat%buffer_cplx, 1, 1, mat%descript%tab, info)
4416  ABI_CHECK(info == 0, sjoin("PZPOTRI returned info:", itoa(info)))
4417 
4418  full__ = .True.; if (present(full)) full__ = full
4419  if (full__) then
4420    ! Only the uplo part contains the inverse so we need to fill the other triangular part.
4421    !     1)  Fill the missing triangle with zeros and copy results to work_mat
4422    !     2)  Call pzgeadd to compute: sub(C) := beta*sub(C) + alpha*op(sub(A))
4423    !     3)  Divide diagonal elements by two.
4424 
4425    do il2=1,mat%sizeb_local(2)
4426      iglob2 = mat%loc2gcol(il2)
4427      do il1=1,mat%sizeb_local(1)
4428        iglob1 = mat%loc2grow(il1)
4429        if (uplo == "L" .and. iglob2 > iglob1) mat%buffer_cplx(il1, il2) = zero
4430        if (uplo == "U" .and. iglob2 < iglob1) mat%buffer_cplx(il1, il2) = zero
4431      end do
4432    end do
4433 
4434    call mat%copy(work_mat, empty=.False.)
4435 
4436    ! call pzgeadd(trans, m, n, alpha, a, ia, ja, desca, beta, c, ic, jc, descc)
4437    ! sub(C) := beta*sub(C) + alpha*op(sub(A))
4438    call pzgeadd("C", mm, mm, cone, work_mat%buffer_cplx, 1, 1, work_mat%descript%tab, &
4439          cone, mat%buffer_cplx, 1, 1, mat%descript%tab)
4440    call work_mat%free()
4441 
4442    do il2=1,mat%sizeb_local(2)
4443      iglob2 = mat%loc2gcol(il2)
4444      do il1=1,mat%sizeb_local(1)
4445        iglob1 = mat%loc2grow(il1)
4446        if (iglob2 == iglob1) mat%buffer_cplx(il1, il2) = half * mat%buffer_cplx(il1, il2)
4447      end do
4448    end do
4449  end if ! full__
4450 #endif
4451 
4452 end subroutine slk_hpd_invert

m_slk/slk_invert [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

 slk_invert

FUNCTION

  Compute the inverse of a complex matrix.

SIDE EFFECTS

 mat
    In input, the matrix to invert.
    In output the matrix inverted and distributed among the nodes.

SOURCE

4265 subroutine slk_invert(mat)
4266 
4267 !Arguments ------------------------------------
4268  class(basemat_t),intent(inout) :: mat
4269 
4270 #ifdef HAVE_LINALG_SCALAPACK
4271 !Local variables ------------------------------
4272 !scalars
4273  integer :: lwork,info,ipiv_size,liwork
4274 !array
4275  integer,allocatable :: ipiv(:), iwork(:)
4276  complex(dp),allocatable :: work_dp(:)
4277  complex(sp),allocatable :: work_sp(:)
4278 
4279 !************************************************************************
4280 
4281  ! IMPORTANT NOTE: PZGETRF requires square block decomposition i.e., MB_A = NB_A.
4282  if (mat%descript%tab(MB_) /= mat%descript%tab(NB_)) then
4283    ABI_ERROR(" PZGETRF requires square block decomposition i.e MB_A = NB_A.")
4284  end if
4285 
4286  ipiv_size = my_locr(mat) + mat%descript%tab(MB_)
4287  ABI_MALLOC(ipiv, (ipiv_size))
4288 
4289  select type (mat)
4290  class is (matrix_scalapack)
4291    if (allocated(mat%buffer_cplx)) then
4292      ! P * L * U  Factorization.
4293      call PZGETRF(mat%sizeb_global(1), mat%sizeb_global(2), mat%buffer_cplx, 1, 1, mat%descript%tab,ipiv, info)
4294      ABI_CHECK(info == 0, sjoin(" PZGETRF returned info:", itoa(info)))
4295 
4296      ! Get optimal size of workspace for PZGETRI.
4297      lwork = -1; liwork = -1
4298      ABI_MALLOC(work_dp,(1))
4299      ABI_MALLOC(iwork,(1))
4300 
4301      call PZGETRI(mat%sizeb_global(1), mat%buffer_cplx, 1, 1, mat%descript%tab, ipiv, work_dp, lwork, iwork, liwork, info)
4302      ABI_CHECK(info == 0, "PZGETRI: Error while computing workspace size")
4303 
4304      lwork = nint(real(work_dp(1))); liwork=iwork(1)
4305      ABI_FREE(work_dp)
4306      ABI_FREE(iwork)
4307 
4308      ! Solve the problem.
4309      ABI_MALLOC(work_dp, (lwork))
4310      ABI_MALLOC(iwork, (liwork))
4311 
4312      call PZGETRI(mat%sizeb_global(1), mat%buffer_cplx, 1, 1, mat%descript%tab, ipiv, work_dp, lwork, iwork, liwork, info)
4313      ABI_CHECK(info == 0, sjoin("PZGETRI returned info:", itoa(info)))
4314      ABI_FREE(work_dp)
4315 
4316    else if (allocated(mat%buffer_real)) then
4317      ABI_ERROR("Inversion for real matrices not coded!")
4318    end if
4319 
4320  class is (slkmat_sp_t)
4321    if (allocated(mat%buffer_cplx)) then
4322      ! P * L * U  Factorization.
4323      call PCGETRF(mat%sizeb_global(1), mat%sizeb_global(2), mat%buffer_cplx, 1, 1, mat%descript%tab,ipiv, info)
4324      ABI_CHECK(info == 0, sjoin(" PCGETRF returned info:", itoa(info)))
4325 
4326      ! Get optimal size of workspace for PZGETRI.
4327      lwork = -1; liwork = -1
4328      ABI_MALLOC(work_sp,(1))
4329      ABI_MALLOC(iwork,(1))
4330 
4331      call PCGETRI(mat%sizeb_global(1), mat%buffer_cplx, 1, 1, mat%descript%tab, ipiv, work_sp, lwork, iwork, liwork, info)
4332      ABI_CHECK(info == 0, "PZGETRI: Error while computing workspace size")
4333 
4334      lwork = nint(real(work_sp(1))); liwork=iwork(1)
4335      ABI_FREE(work_sp)
4336      ABI_FREE(iwork)
4337 
4338      ! Solve the problem.
4339      ABI_MALLOC(work_sp, (lwork))
4340      ABI_MALLOC(iwork, (liwork))
4341 
4342      call PCGETRI(mat%sizeb_global(1), mat%buffer_cplx, 1, 1, mat%descript%tab, ipiv, work_sp, lwork, iwork, liwork, info)
4343      ABI_CHECK(info == 0, sjoin("PZGETRI returned info:", itoa(info)))
4344      ABI_FREE(work_sp)
4345 
4346    else if (allocated(mat%buffer_real)) then
4347      ABI_ERROR("Inversion for real matrices not coded!")
4348    end if
4349 
4350  class default
4351    ABI_ERROR("Wrong class")
4352  end select
4353 
4354  ABI_FREE(iwork)
4355  ABI_FREE(ipiv)
4356 #endif
4357 
4358 end subroutine slk_invert

m_slk/slk_matrix_from_global_dpc_1Dp [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_matrix_from_global_dpc_1Dp

FUNCTION

  Routine to fill a complex SCALAPACK matrix with respect to a global matrix.
  target: double precision complex matrix in packed form.

INPUTS

  glob_pmat(n*(n+1)/2)=One-dimensional array containing the global matrix A packed columnwise in a linear array.
    The j-th column of A is stored in the array glob_pmat as follows:
      if uplo = "U", glob_pmat(i + (j-1)*j/2)       = A(i,j) for 1<=i<=j;
      if uplo = "L", glob_pmat(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
      where n is the number of rows or columns in the global matrix.
  uplo=String specifying whether only the upper or lower triangular part of the global matrix is used:
    = "U":  Upper triangular
    = "L":  Lower triangular

SIDE EFFECTS

  mat<matrix_scalapack>=The distributed matrix.
    %buffer_cplx=Local buffer containg the value this node is dealing with.

SOURCE

2244 subroutine slk_matrix_from_global_dpc_1Dp(mat,uplo,glob_pmat)
2245 
2246 !Arguments ------------------------------------
2247 !scalars
2248  class(matrix_scalapack),intent(inout)  :: mat
2249  character(len=*),intent(in) :: uplo
2250 !array
2251  complex(dpc),intent(in) :: glob_pmat(:)
2252 
2253 !Local variables-------------------------------
2254  integer :: ii,jj,iglob,jglob,ind,n
2255  real(dp) :: szm
2256 
2257 !************************************************************************
2258 
2259  ABI_CHECK(allocated(mat%buffer_cplx), "%buffer_cplx not allocated")
2260 
2261  szm = SIZE(glob_pmat)
2262  n = NINT( (-1 + SQRT(one+8*szm) )*half )
2263  if (n*(n+1)/2 /= SIZE(glob_pmat)) then
2264    ABI_ERROR("Buggy compiler")
2265  end if
2266 
2267  select case (uplo(1:1))
2268 
2269  case ("U", "u")
2270    ! Only the upper triangle of the global matrix is used.
2271    do jj=1,mat%sizeb_local(2)
2272      do ii=1,mat%sizeb_local(1)
2273        call mat%loc2glob(ii, jj, iglob, jglob)
2274 
2275        if (jglob>=iglob) then
2276          ind = iglob + jglob*(jglob-1)/2
2277          mat%buffer_cplx(ii,jj) =        glob_pmat(ind)
2278        else
2279          ind = jglob + iglob*(iglob-1)/2
2280          mat%buffer_cplx(ii,jj) = DCONJG( glob_pmat(ind) )
2281        end if
2282 
2283      end do
2284    end do
2285 
2286  case ("L", "l")
2287    ! Only the lower triangle of the global matrix is used.
2288    do jj=1,mat%sizeb_local(2)
2289      do ii=1,mat%sizeb_local(1)
2290        call mat%loc2glob(ii, jj, iglob, jglob)
2291 
2292        if (jglob<=iglob) then
2293          ind = iglob + (jglob-1)*(2*n-jglob)/2
2294          mat%buffer_cplx(ii,jj) =        glob_pmat(ind)
2295        else
2296          ind = jglob + (iglob-1)*(2*n-iglob)/2
2297          mat%buffer_cplx(ii,jj) = DCONJG( glob_pmat(ind) )
2298        end if
2299 
2300      end do
2301    end do
2302 
2303  case default
2304    ABI_BUG(" Wrong uplo: "//TRIM(uplo))
2305  end select
2306 
2307 end subroutine slk_matrix_from_global_dpc_1Dp

m_slk/slk_matrix_from_global_dpc_2D [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_matrix_from_global_dpc_2D

FUNCTION

  Routine to fill a complex SCALAPACK matrix with respect to a global matrix.
  target: Two-dimensional double precision complex matrix

INPUTS

  glob_mat=Two-dimensional array containing the global matrix.
  uplo=String specifying whether only the upper or lower triangular part of the global matrix is used:
    = "U":  Upper triangular
    = "L":  Lower triangular
    = "A":  Full matrix (used for general complex matrices)

SIDE EFFECTS

  mat<matrix_scalapack>=The distributed matrix.
    %buffer_cplx=Local buffer containg the value this node is dealing with.

SOURCE

2158 subroutine slk_matrix_from_global_dpc_2D(mat, uplo, glob_mat)
2159 
2160 !Arguments ------------------------------------
2161 !scalars
2162  class(matrix_scalapack),intent(inout)  :: mat
2163  character(len=*),intent(in) :: uplo
2164 !array
2165  complex(dpc),intent(in) :: glob_mat(:,:)
2166 
2167 !Local variables-------------------------------
2168  integer :: ii, jj, iglob, jglob
2169 
2170 !************************************************************************
2171 
2172  ABI_CHECK(allocated(mat%buffer_cplx), "%buffer_cplx not allocated")
2173 
2174  select case (uplo(1:1))
2175 
2176  case ("A", "a")
2177    ! Full global matrix is used.
2178    do jj=1,mat%sizeb_local(2)
2179      do ii=1,mat%sizeb_local(1)
2180        call mat%loc2glob(ii, jj, iglob, jglob)
2181        mat%buffer_cplx(ii,jj) = glob_mat(iglob,jglob)
2182      end do
2183    end do
2184 
2185  case ("U", "u")
2186    ! Only the upper triangle of the global matrix is used.
2187    do jj=1,mat%sizeb_local(2)
2188      do ii=1,mat%sizeb_local(1)
2189        call mat%loc2glob(ii, jj, iglob, jglob)
2190        if (jglob>=iglob) then
2191          mat%buffer_cplx(ii,jj) =        glob_mat(iglob,jglob)
2192        else
2193          mat%buffer_cplx(ii,jj) = DCONJG(glob_mat(jglob,iglob))
2194        end if
2195      end do
2196    end do
2197 
2198  case ("L", "l")
2199    ! Only the lower triangle of the global matrix is used.
2200    do jj=1,mat%sizeb_local(2)
2201      do ii=1,mat%sizeb_local(1)
2202        call mat%loc2glob(ii, jj, iglob, jglob)
2203        if (jglob<=iglob) then
2204          mat%buffer_cplx(ii,jj) =        glob_mat(iglob,jglob)
2205        else
2206          mat%buffer_cplx(ii,jj) = DCONJG(glob_mat(jglob,iglob))
2207        end if
2208      end do
2209    end do
2210 
2211  case default
2212    ABI_BUG(" Wrong uplo: "//TRIM(uplo))
2213  end select
2214 
2215 end subroutine slk_matrix_from_global_dpc_2D

m_slk/slk_matrix_loc2gcol [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_matrix_loc2gcol

FUNCTION

  Determine the global column index of an element from the local index

INPUTS

  matrix= the matrix to process.
  jloc= local column index.

SOURCE

1696 integer pure function slk_matrix_loc2gcol(matrix, jloc) result(jglob)
1697 
1698 !Arguments ------------------------------------
1699  class(basemat_t),intent(in) :: matrix
1700  integer, intent(in) :: jloc
1701 
1702 ! *********************************************************************
1703 
1704  jglob = loc_glob__(matrix, matrix%processor, jloc, 2)
1705 
1706 end function slk_matrix_loc2gcol

m_slk/slk_matrix_loc2glob [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_matrix_loc2glob

FUNCTION

  Determine the global indices of an element from its local indices.

INPUTS

  matrix= the matrix to process.
  iloc= local row index.
  jloc= local column index.

OUTPUT

  i= row in the matrix
  j= column in the matrix

SOURCE

1642 pure subroutine slk_matrix_loc2glob(matrix, iloc, jloc, i, j)
1643 
1644 !Arguments ------------------------------------
1645  class(basemat_t),intent(in) :: matrix
1646  integer, intent(in) :: iloc,jloc
1647  integer, intent(out) :: i,j
1648 
1649 ! *********************************************************************
1650 
1651  i = loc_glob__(matrix, matrix%processor, iloc, 1)
1652  j = loc_glob__(matrix, matrix%processor, jloc, 2)
1653 
1654 end subroutine slk_matrix_loc2glob

m_slk/slk_matrix_loc2grow [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_matrix_loc2grow

FUNCTION

  Determine the global row index from the local index

INPUTS

  matrix= the matrix to process.
  iloc= local row index.

SOURCE

1670 integer pure function slk_matrix_loc2grow(matrix, iloc) result(iglob)
1671 
1672 !Arguments ------------------------------------
1673  class(basemat_t),intent(in) :: matrix
1674  integer, intent(in) :: iloc
1675 
1676 ! *********************************************************************
1677 
1678  iglob = loc_glob__(matrix, matrix%processor, iloc, 1)
1679 
1680 end function slk_matrix_loc2grow

m_slk/slk_matrix_to_global_dpc_2D [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_matrix_to_global_dpc_2D

FUNCTION

  Fill a global matrix with respect to a SCALAPACK matrix.
  target: Two-dimensional double precision complex matrix.

INPUTS

  Slk_mat<matrix_scalapack>=The distributed matrix.
  uplo=String specifying whether the upper or lower triangular part of the global matrix has to be filled:
    = "U":  Upper triangular
    = "L":  Lower triangular
    = "A":  Full matrix is filled (used for general complex matrices)

SIDE EFFECTS

  glob_mat=The global matrix where the entries owned by this processors have been overwritten.
  Note that the remaing entries not treated by this node are not changed.

SOURCE

2333 subroutine slk_matrix_to_global_dpc_2D(mat, uplo, glob_mat)
2334 
2335 !Arguments ------------------------------------
2336 !scalaras
2337  class(matrix_scalapack),intent(in) :: mat
2338  character(len=*),intent(in) :: uplo
2339 !arrays
2340  complex(dpc),intent(inout) :: glob_mat(:,:)
2341 
2342 !Local variables-------------------------------
2343  integer :: ii,jj,iglob,jglob
2344 
2345 !************************************************************************
2346 
2347  select case (uplo(1:1))
2348 
2349  case ("A", "a")
2350    ! Full global matrix has to be filled.
2351    do jj=1,mat%sizeb_local(2)
2352      do ii=1,mat%sizeb_local(1)
2353        call mat%loc2glob(ii, jj, iglob, jglob)
2354        glob_mat(iglob,jglob) = mat%buffer_cplx(ii,jj)
2355      end do
2356    end do
2357 
2358  case ("U", "u")
2359    ! Only the upper triangle of the global matrix is filled.
2360    do jj=1,mat%sizeb_local(2)
2361      do ii=1,mat%sizeb_local(1)
2362        call mat%loc2glob(ii, jj, iglob, jglob)
2363        if (jglob>=iglob) glob_mat(iglob,jglob) = mat%buffer_cplx(ii,jj)
2364      end do
2365    end do
2366 
2367  case ("L", "l")
2368    ! Only the lower triangle of the global matrix is filled.
2369    do jj=1,mat%sizeb_local(2)
2370      do ii=1,mat%sizeb_local(1)
2371        call mat%loc2glob(ii, jj, iglob, jglob)
2372        if (jglob<=iglob) glob_mat(iglob,jglob) = mat%buffer_cplx(ii,jj)
2373      end do
2374    end do
2375 
2376  case default
2377    ABI_BUG(" Wrong uplo: "//TRIM(uplo))
2378  end select
2379 
2380 end subroutine slk_matrix_to_global_dpc_2D

m_slk/slk_pgemm_dp [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_pgemm_dp

FUNCTION

  Extended matrix * matrix product: C := alpha*A*B + beta*C
  For a simple matrix vector product, one can simply pass alpha = cone and beta = czero

INPUTS

  matrix1= first ScaLAPACK matrix (matrix A)
  matrix2= second ScaLAPACK matrix (matrix B)
  alpha= scalar multiplicator for the A*B product
  beta= scalar multiplicator for the C matrix
  [ija, ijb, ijc]:  (global). The row and column indices in the distributed matrix A/B/C
    indicating the first row and the first column of the submatrix, respectively. Default [1, 1]

OUTPUT

  results= ScaLAPACK matrix coming out of the operation

NOTES

 The ESLL manual says that
      "matrices matrix1 and matrix2 must have no common elements otherwise, results are unpredictable."
 However the official scaLAPACK documentation does not report this (severe) limitation.

SOURCE

2513 subroutine slk_pgemm_dp(transa, transb, matrix1, alpha, matrix2, beta, results, &
2514                         ija, ijb, ijc) ! optional
2515 
2516 !Arguments ------------------------------------
2517  character(len=1),intent(in) :: transa, transb
2518  class(matrix_scalapack),intent(in) :: matrix1, matrix2
2519  class(matrix_scalapack),intent(inout) :: results
2520  complex(dpc),intent(in) :: alpha, beta
2521  integer,optional,intent(in) :: ija(2), ijb(2), ijc(2)
2522 
2523 !Local variables-------------------------------
2524  integer :: mm, nn, kk, ija__(2), ijb__(2), ijc__(2)
2525 
2526 !************************************************************************
2527 
2528  ija__ = [1, 1]; if (present(ija)) ija__ = ija
2529  ijb__ = [1, 1]; if (present(ijb)) ijb__ = ijb
2530  ijc__ = [1, 1]; if (present(ijc)) ijc__ = ijc
2531 
2532  mm  = matrix1%sizeb_global(1)
2533  nn  = matrix2%sizeb_global(2)
2534  kk  = matrix1%sizeb_global(2)
2535 
2536  if (toupper(transa) /= 'N') then
2537    mm = matrix1%sizeb_global(2)
2538    kk = matrix1%sizeb_global(1)
2539  end if
2540  if (toupper(transb) /= 'N') nn = matrix2%sizeb_global(1)
2541 
2542 #ifdef HAVE_LINALG_SCALAPACK
2543  ! pzgemm(transa, transb, m, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, ic, jc, descc)
2544  if (matrix1%istwf_k /= 2) then
2545    call PZGEMM(transa, transb, mm, nn, kk, alpha, &
2546                matrix1%buffer_cplx, ija__(1), ija__(2), matrix1%descript%tab, &
2547                matrix2%buffer_cplx, ijb__(1), ijb__(2), matrix2%descript%tab, &
2548                beta, results%buffer_cplx, ijc__(1), ijc__(2), results%descript%tab)
2549  else
2550    call PDGEMM(transa, transb, mm, nn, kk, real(alpha, kind=dp), &
2551                matrix1%buffer_cplx, ija__(1), ija__(2), matrix1%descript%tab, &
2552                matrix2%buffer_cplx, ijb__(1), ijb__(2), matrix2%descript%tab, &
2553                real(beta, kind=dp), results%buffer_cplx, ijc__(1), ijc__(2), results%descript%tab)
2554  end if
2555 #endif
2556 
2557 end subroutine slk_pgemm_dp

m_slk/slk_pgemm_sp [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_pgemm_sp

FUNCTION

  Extended matrix * matrix product: C := alpha*A*B + beta*C
  For a simple matrix vector product, one can simply pass alpha = cone and beta = czero

INPUTS

  matrix1= first ScaLAPACK matrix (matrix A)
  matrix2= second ScaLAPACK matrix (matrix B)
  alpha= scalar multiplicator for the A*B product
  beta= scalar multiplicator for the C matrix
  [ija, ijb, ijc]:  (global). The row and column indices in the distributed matrix A/B/C
    indicating the first row and the first column of the submatrix, respectively. Default [1, 1]

OUTPUT

  results= ScaLAPACK matrix coming out of the operation

NOTES

 The ESLL manual says that
      "matrices matrix1 and matrix2 must have no common elements otherwise, results are unpredictable."
 However the official scaLAPACK documentation does not report this (severe) limitation.

SOURCE

2588 subroutine slk_pgemm_sp(transa, transb, matrix1, alpha, matrix2, beta, results, &
2589                       ija, ijb, ijc) ! optional
2590 
2591 !Arguments ------------------------------------
2592  character(len=1),intent(in) :: transa, transb
2593  class(slkmat_sp_t),intent(in) :: matrix1, matrix2
2594  class(slkmat_sp_t),intent(inout) :: results
2595  complex(sp),intent(in) :: alpha, beta
2596  integer,optional,intent(in) :: ija(2), ijb(2), ijc(2)
2597 
2598 !Local variables-------------------------------
2599  integer :: mm, nn, kk, ija__(2), ijb__(2), ijc__(2)
2600 
2601 !************************************************************************
2602 
2603  ija__ = [1, 1]; if (present(ija)) ija__ = ija
2604  ijb__ = [1, 1]; if (present(ijb)) ijb__ = ijb
2605  ijc__ = [1, 1]; if (present(ijc)) ijc__ = ijc
2606 
2607  mm  = matrix1%sizeb_global(1)
2608  nn  = matrix2%sizeb_global(2)
2609  kk  = matrix1%sizeb_global(2)
2610 
2611  if (toupper(transa) /= 'N') then
2612    mm = matrix1%sizeb_global(2)
2613    kk = matrix1%sizeb_global(1)
2614  end if
2615  if (toupper(transb) /= 'N') nn = matrix2%sizeb_global(1)
2616 
2617 #ifdef HAVE_LINALG_SCALAPACK
2618  ! pzgemm(transa, transb, m, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, ic, jc, descc)
2619  if (matrix1%istwf_k /= 2) then
2620    call PCGEMM(transa, transb, mm, nn, kk, alpha, &
2621                matrix1%buffer_cplx, ija__(1), ija__(2), matrix1%descript%tab, &
2622                matrix2%buffer_cplx, ijb__(1), ijb__(2), matrix2%descript%tab, &
2623                beta, results%buffer_cplx, ijc__(1), ijc__(2), results%descript%tab)
2624  else
2625    call PSGEMM(transa, transb, mm, nn, kk, real(alpha, kind=sp), &
2626                matrix1%buffer_cplx, ija__(1), ija__(2), matrix1%descript%tab, &
2627                matrix2%buffer_cplx, ijb__(1), ijb__(2), matrix2%descript%tab, &
2628                real(beta, kind=sp), results%buffer_cplx, ijc__(1), ijc__(2), results%descript%tab)
2629  end if
2630 #endif
2631 
2632 end subroutine slk_pgemm_sp

m_slk/slk_ptrans [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_ptrans

FUNCTION

 Transposes a matrix

    sub( C ) := beta*sub( C ) + alpha*op( sub( A ) )

 where

    sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1),
    sub( A ) denotes A(IA:IA+N-1,JA:JA+M-1), and, op( X ) = X'.

 Thus, op( sub( A ) ) denotes A(IA:IA+N-1,JA:JA+M-1)'.
 Beta is a scalar, sub( C ) is an m by n submatrix, and sub( A ) is an n by m submatrix.

INPUTS

  [ija(2)]: (global) The row and column indices in the distributed matrix in_mat indicating
       the first row and the first column of the submatrix sub(A), respectively.
  [ijc(2)]: (global) The row and column indices in the distributed matrix out_mat
   indicating the first row and the first column of the submatrix sub(C), respectively.
  [free]: True to deallocate in_mat. Default: False

SOURCE

4577 subroutine slk_ptrans(in_mat, trans, out_mat, &
4578                       out_gshape, ija, ijc, size_blocs, alpha, beta, free) ! optional
4579 
4580 !Arguments ------------------------------------
4581  class(matrix_scalapack),intent(inout) :: in_mat
4582  character(len=1),intent(in) :: trans
4583  class(matrix_scalapack),intent(inout) :: out_mat
4584  integer,optional,intent(in) :: out_gshape(2), size_blocs(2), ija(2), ijc(2)
4585  complex(dp),optional,intent(in) :: alpha, beta
4586  logical,optional,intent(in) :: free
4587 
4588 !Local variables-------------------------------
4589  integer :: sb, mm, nn, size_blocs__(2)
4590  real(dp) :: ralpha__, rbeta__
4591  integer :: ija__(2), ijc__(2)
4592  complex(dp) :: calpha__, cbeta__
4593 
4594 ! *************************************************************************
4595 
4596  ija__ = [1, 1]; if (present(ija)) ija__ = ija
4597  ijc__ = [1, 1]; if (present(ijc)) ijc__ = ijc
4598 
4599  ! transposed output (sub)matrix has shape (nn, mm)
4600  if (present(out_gshape)) then
4601    nn = out_gshape(1)
4602    mm = out_gshape(2)
4603  else
4604    nn = in_mat%sizeb_global(2)
4605    mm = in_mat%sizeb_global(1)
4606  end if
4607 
4608  if (present(size_blocs)) then
4609    size_blocs__ = size_blocs
4610  else
4611    ! FIXME: This can cause problems if I start to use round-robin distribution in GWR!!!!!
4612    size_blocs__(1) = in_mat%sizeb_global(2)
4613    sb = in_mat%sizeb_global(1) / in_mat%processor%grid%dims(2)
4614    if (mod(in_mat%sizeb_global(1), in_mat%processor%grid%dims(2)) /= 0) sb = sb + 1
4615    size_blocs__(2) = sb
4616    !size_blocs__(2) = in_mat%sizeb_blocs(1); size_blocs__(1) = in_mat%sizeb_blocs(2)
4617  end if
4618 
4619  call out_mat%init(nn, mm, in_mat%processor, in_mat%istwf_k, size_blocs=size_blocs__)
4620 
4621  ! prototype: call pdtran(m, n, alpha, a, ia, ja, desca, beta, c, ic, jc, descc)
4622 
4623  if (allocated(in_mat%buffer_cplx)) then
4624 #ifdef HAVE_LINALG_SCALAPACK
4625    select case (trans)
4626    case ("N")
4627      ! sub(C) := beta*sub(C) + alpha*sub(A)',
4628      calpha__ = cone; if (present(alpha)) calpha__ = alpha
4629      cbeta__ = czero; if (present(beta)) cbeta__ = beta
4630      call pztranu(nn, mm, calpha__, in_mat%buffer_cplx, ija__(1), ija__(2), &
4631                   in_mat%descript%tab, cbeta__, out_mat%buffer_cplx, ijc__(1), ijc__(2), out_mat%descript%tab)
4632 
4633    case ("C")
4634      ! sub(C) := beta * sub(C) + alpha * conjg(sub(A)')
4635      calpha__ = cone; if (present(alpha)) calpha__ = alpha
4636      cbeta__ = czero; if (present(beta)) cbeta__ = beta
4637      call pztranc(nn, mm, calpha__, in_mat%buffer_cplx, ija__(1), ija__(2), &
4638                   in_mat%descript%tab, cbeta__, out_mat%buffer_cplx, ijc__(1), ijc__(2), out_mat%descript%tab)
4639 
4640    case default
4641      ABI_ERROR(sjoin("Invalid value for trans:", trans))
4642    end select
4643 
4644  else if (allocated(in_mat%buffer_real)) then
4645      ralpha__ = one; if (present(alpha)) ralpha__ = real(alpha)
4646      rbeta__ = zero; if (present(beta)) rbeta__ = real(beta)
4647      call pdtran(nn, mm, ralpha__, in_mat%buffer_real, ija__(1), ija__(2), &
4648                  in_mat%descript%tab, rbeta__, out_mat%buffer_real, ijc__(1), ijc__(2), out_mat%descript%tab)
4649 #endif
4650  else
4651    ABI_ERROR("Neither buffer_cplx nor buffer_real are allocated!")
4652  end if
4653 
4654  if (present(free)) then
4655    if (free) call in_mat%free()
4656  end if
4657 
4658 end subroutine slk_ptrans

m_slk/slk_pzheevx [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_pzheevx

FUNCTION

  slk_pzheevx computes selected eigenvalues and, optionally, eigenvectors of a complex Hermitian matrix A.
  A * X = lambda * X

INPUTS

  mat=ScaLAPACK matrix (matrix A)

  vec=The distributed eigenvectors X. Not referenced if JOBZ="N"

  JOBZ    (global input) CHARACTER*1
          Specifies whether or not to compute the eigenvectors:
          = "N":  Compute eigenvalues only.
          = "V":  Compute eigenvalues and eigenvectors.

  RANGE   (global input) CHARACTER*1
          = "A": all eigenvalues will be found.
          = "V": all eigenvalues in the interval [VL,VU] will be found.
          = "I": the IL-th through IU-th eigenvalues will be found.

  UPLO    (global input) CHARACTER*1
          Specifies whether the upper or lower triangular part of the Hermitian matrix A is stored:
          = "U":  Upper triangular
          = "L":  Lower triangular

  VL      (global input) DOUBLE PRECISION
          If  RANGE="V",the lower bound of the interval to be searched for eigenvalues. Not referenced if RANGE =
          "A" or "I"

  VU      (global input) DOUBLE PRECISION
          If RANGE="V", the upper bound of the interval to be searched for eigenvalues.  Not referenced  if  RANGE  =
          "A" or "I".

  IL     (global input) integer
         If  RANGE="I",  the  index  (from smallest to largest) of the smallest eigenvalue to be returned.  IL >= 1.
         Not referenced if RANGE = "A" or "V".

  IU     (global input) integer
         If RANGE="I", the index (from smallest to largest) of the largest eigenvalue to be returned.  min(IL,N)  <=
         IU <= N.  Not referenced if RANGE = "A" or "V"

  ABSTOL  (global input) DOUBLE PRECISION
          If JOBZ="V", setting ABSTOL to PDLAMCH( CONTEXT, "U") yields the most orthogonal eigenvectors.
          The  absolute error tolerance for the eigenvalues.  An approximate eigenvalue is accepted as converged when
          it is determined to lie in an interval [a,b] of width less than or equal to

           ABSTOL + EPS *   max( |a|,|b| ) ,

          where EPS is the machine precision.  If ABSTOL is less than or equal to zero, then EPS*norm(T) will be used
          in  its  place, where norm(T) is the 1-norm of the tridiagonal matrix obtained by reducing A to tridiagonal form.
          Eigenvalues will be computed  most  accurately  when  ABSTOL  is  set  to  twice  the  underflow  threshold
          2*PDLAMCH("S")  not  zero.   If  this routine returns with ((MOD(INFO,2).NE.0) .OR.  (MOD(INFO/8,2).NE.0)),
          indicating that some eigenvalues or eigenvectors did not converge, try setting ABSTOL to 2*PDLAMCH("S").

OUTPUT

  mene_found= (global output) Total number of eigenvalues found.  0 <= mene_found <= N.
  eigen(N)= (global output) Eigenvalues of A where N is the dimension of M
            On normal exit, the first mene_found entries contain the selected eigenvalues in ascending order.

SIDE EFFECTS

  If JOBZ="V", the local buffer vec%buffer_cplx will contain part of the distributed eigenvectors.
  Slk%mat%buffer_cplx is destroyed when the routine returns

SOURCE

3763 subroutine slk_pzheevx(mat, jobz, range, uplo, vl, vu, il, iu, abstol, vec, mene_found, eigen)
3764 
3765 !Arguments ------------------------------------
3766  class(matrix_scalapack),intent(inout) :: mat
3767  integer,intent(in) :: il, iu
3768  integer,intent(out) :: mene_found
3769  real(dp),intent(in) :: abstol,vl,vu
3770  character(len=*),intent(in) :: jobz,range,uplo
3771  class(matrix_scalapack),intent(inout) :: vec
3772 !arrays
3773  real(dp),intent(out) :: eigen(*)
3774 
3775 #ifdef HAVE_LINALG_SCALAPACK
3776 !Local variables-------------------------------
3777 !scalars
3778  integer  :: lwork,lrwork,liwork,info,nvec_calc !,ierr
3779  real(dp) :: orfac
3780  character(len=500) :: msg
3781 !arrays
3782  !integer :: ibuff(3),max_ibuff(3)
3783  integer,allocatable  :: iwork(:),iclustr(:),ifail(:)
3784  real(dp),allocatable  :: rwork(:),gap(:)
3785  complex(dpc),allocatable :: work(:)
3786 
3787 !************************************************************************
3788 
3789  ABI_CHECK(allocated(mat%buffer_cplx), "buffer_cplx is not allocated!")
3790 
3791  ! abstol = PDLAMCH(vec%processor%grid%ictxt,'U')
3792 
3793  orfac  = -one ! Only for eigenvectors: use default value 10d-3.
3794  ! Vectors within orfac*norm(A) will be reorthogonalized.
3795 
3796  ! Allocate the arrays for the results of the calculation
3797  ABI_MALLOC(gap, (mat%processor%grid%dims(1) * mat%processor%grid%dims(2)))
3798 
3799  if (firstchar(jobz, ["V","v"])) then
3800    ABI_MALLOC(ifail, (mat%sizeb_global(2)))
3801    ABI_MALLOC(iclustr, (2*mat%processor%grid%dims(1) * mat%processor%grid%dims(2)))
3802  end if
3803 
3804  ! Get the optimal size of the work arrays.
3805  lwork=-1; lrwork=-1; liwork=-1
3806  ABI_MALLOC(work, (1))
3807  ABI_MALLOC(iwork, (1))
3808  ABI_MALLOC(rwork, (3))
3809  ! This is clearly seen in the source in which rwork(1:3) is accessed
3810  ! in the calculation of the workspace size.
3811 
3812   ! prototype
3813   !call pzheevx(jobz, range, uplo, n, a, ia, ja, desca, vl, vu, il, iu, abstol, m, nz, w,
3814   !             orfac, z, iz, jz, descz, work, lwork, rwork, lrwork, iwork, liwork, ifail, iclustr, gap, info)
3815 
3816   call PZHEEVX(jobz,range,uplo, mat%sizeb_global(2),mat%buffer_cplx,1,1,mat%descript%tab,&
3817     vl,vu,il,iu,abstol,mene_found,nvec_calc,eigen,orfac,&
3818     vec%buffer_cplx,1,1,vec%descript%tab,&
3819     work,lwork,rwork,lrwork,iwork,liwork,ifail,iclustr,gap,info)
3820 
3821   ABI_CHECK(info == 0, sjoin("Problem to compute workspace, info:", itoa(info)))
3822 
3823   lwork  = NINT(real(work(1)),kind=dp)
3824   lrwork = NINT(rwork(1))
3825   liwork = iwork(1)
3826 
3827   ABI_FREE(work)
3828   ABI_FREE(rwork)
3829   ABI_FREE(iwork)
3830   !
3831   ! FROM THE SCALAPACK MAN PAGE:
3832   ! The computed eigenvectors may not be orthogonal if the minimal workspace is supplied and ORFAC is too
3833   ! small. If you  want to guarantee orthogonality (at the cost of potentially poor performance) you should
3834   ! add the following to LRWORK: (CLUSTERSIZE-1)*N where CLUSTERSIZE is  the  number  of  eigenvalues  in  the
3835   ! largest cluster, where a cluster is defined as a set of close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
3836   ! W(J+1) <= W(J) + ORFAC*2*norm(A) }.
3837 
3838   if (firstchar(jobz, ["V","v"])) then
3839     lrwork = INT( lrwork + mat%sizeb_global(2) *(mat%sizeb_global(2)-1) )
3840   end if
3841 
3842   ! ibuff(1) = lwork
3843   ! ibuff(2) = lrwork !INT(lrwork + mat%sizeb_global(2) *(mat%sizeb_global(2)-1)
3844   ! ibuff(3) = liwork
3845 
3846   ! Get the maximum of sizes of the work arrays processor%comm
3847   ! call MPI_ALLREDUCE(ibuff,max_ibuff,3,MPI_integer,MPI_MAX,comm,ierr)
3848 
3849   ! lwork  = max_ibuff(1)
3850   ! lrwork = max_ibuff(2)
3851   ! liwork = max_ibuff(3)
3852 
3853   ABI_MALLOC(work , (lwork ))
3854   ABI_MALLOC(rwork, (lrwork))
3855   ABI_MALLOC(iwork, (liwork))
3856 
3857   ! prototype
3858   !call pzheevx(jobz, range, uplo, n, a, ia, ja, desca, vl, vu, il, iu, abstol, m, nz, w,
3859   !             orfac, z, iz, jz, descz, work, lwork, rwork, lrwork, iwork, liwork, ifail, iclustr, gap, info)
3860 
3861   ! Call the scaLAPACK routine.
3862   ! write(std_out,*) 'I am using PZHEEVX'
3863   call PZHEEVX(jobz,range,uplo, mat%sizeb_global(2),mat%buffer_cplx,1,1,mat%descript%tab,&
3864     vl,vu,il,iu,abstol,mene_found,nvec_calc, eigen,orfac,&
3865     vec%buffer_cplx,1,1,vec%descript%tab,&
3866     work,lwork,rwork,lrwork,iwork,liwork,ifail,iclustr,gap,info)
3867 
3868  ! TODO
3869  !call pxheevx_info_to_msg(info, jobz, nvec_calc, mene_found, msg)
3870  !ABI_CHECK(info == 0, msg)
3871 
3872  ! Handle possible error.
3873  if (info < 0) then
3874    write(msg,'(a,i0,a)')" The ", -info, "-th argument of P?HEEVX had an illegal value."
3875    if (info == -25) msg = " LRWORK is too small to compute all the eigenvectors requested, no computation is performed"
3876    ABI_ERROR(msg)
3877  end if
3878 
3879  if (info > 0) then
3880    write(msg,'(a,i0)') " P?HEEVX returned info: ",info
3881    call wrtout(std_out, msg)
3882    if (MOD(info, 2) /= 0)then
3883      write(msg,'(3a)')&
3884      " One or more eigenvectors failed to converge. ",ch10,&
3885      " Their indices are stored in IFAIL. Ensure ABSTOL=2.0*PDLAMCH('U')"
3886      call wrtout(std_out, msg)
3887    end if
3888    if (MOD(info / 2, 2) /= 0) then
3889      write(msg,'(5a)')&
3890      " Eigenvectors corresponding to one or more clusters of eigenvalues ",ch10,&
3891      " could not be reorthogonalized because of insufficient workspace. ",ch10,&
3892      " The indices of the clusters are stored in the array ICLUSTR."
3893      call wrtout(std_out, msg)
3894    end if
3895    if (MOD(info / 4, 2) /= 0) then
3896      write(msg,'(3a)')" Space limit prevented PZHEEVX from computing all of the eigenvectors between VL and VU. ",ch10,&
3897       " The number of eigenvectors computed is returned in NZ."
3898      call wrtout(std_out, msg)
3899    end if
3900    if (MOD(info / 8, 2) /= 0) then
3901      call wrtout(std_out, "PZSTEBZ failed to compute eigenvalues. Ensure ABSTOL=2.0*PDLAMCH('U')")
3902    end if
3903    ABI_ERROR("Cannot continue")
3904  end if
3905 
3906  ! Check the number of eigenvalues found wrt to the number of vectors calculated.
3907  if ( firstchar(jobz, ['V','v']) .and. mene_found /= nvec_calc) then
3908    write(msg,'(5a)')&
3909    " The user supplied insufficient space and PZHEEVX is not able to detect this before beginning computation. ",ch10,&
3910    " To get all the eigenvectors requested, the user must supply both sufficient space to hold the ",ch10,&
3911    " eigenvectors in Z (M .LE. DESCZ(N_)) and sufficient workspace to compute them. "
3912    !ierr = huge(1)
3913    ABI_ERROR(msg)
3914  end if
3915 
3916  ABI_FREE(work)
3917  ABI_FREE(rwork)
3918  ABI_FREE(iwork)
3919  ABI_FREE(gap)
3920 
3921  ABI_SFREE(ifail)
3922  ABI_SFREE(iclustr)
3923 #endif
3924 
3925 end subroutine slk_pzheevx

m_slk/slk_pzhegvx [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_pzhegvx

FUNCTION

  slk_pzhegvx provides an object-oriented interface to the ScaLAPACK routine PZHEGVX that
  computes selected eigenvalues and, optionally, eigenvectors of a complex generalized
  Hermitian-definite eigenproblem, of the form
  sub( A )*x=(lambda)*sub( B )*x,  sub( A )*sub( B )x=(lambda)*x,  or sub( B )*sub( A )*x=(lambda)*x.
  Here sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ) is assumed to be
  Hermitian, and sub( B ) denoting B( IB:IB+N-1, JB:JB+N-1 ) is assumed
  to be Hermitian positive definite.

INPUTS

  Slk_matA<matrix_scalapack>=ScaLAPACK matrix (matrix A)
  Slk_matB<matrix_scalapack>=ScaLAPACK matrix (matrix B)
  Slk_vec<matrix_scalapack>=The distributed eigenvectors X. Not referenced if JOBZ="N"

  IBtype   (global input) integer
          Specifies the problem type to be solved:
          = 1:  sub( A )*x = (lambda)*sub( B )*x
          = 2:  sub( A )*sub( B )*x = (lambda)*x
          = 3:  sub( B )*sub( A )*x = (lambda)*x

  JOBZ    (global input) CHARACTER*1
          Specifies whether or not to compute the eigenvectors:
          = "N":  Compute eigenvalues only.
          = "V":  Compute eigenvalues and eigenvectors.

  RANGE   (global input) CHARACTER*1
          = "A": all eigenvalues will be found.
          = "V": all eigenvalues in the interval [VL,VU] will be found.
          = "I": the IL-th through IU-th eigenvalues will be found.

  UPLO    (global input) CHARACTER*1
          Specifies whether the upper or lower triangular part of the Hermitian matrix sub(A) and sub(B) is stored:
          = "U":  Upper triangular
          = "L":  Lower triangular

  VL      (global input) DOUBLE PRECISION
          If  RANGE="V",the lower bound of the interval to be searched for eigenvalues. Not referenced if RANGE =
          "A" or "I"

  VU      (global input) DOUBLE PRECISION
          If RANGE="V", the upper bound of the interval to be searched for eigenvalues.  Not referenced  if  RANGE  =
          "A" or "I".

  IL     (global input) integer
         If  RANGE="I",  the  index  (from smallest to largest) of the smallest eigenvalue to be returned.  IL >= 1.
         Not referenced if RANGE = "A" or "V".

  IU     (global input) integer
         If RANGE="I", the index (from smallest to largest) of the largest eigenvalue to be returned.  min(IL,N)  <=
         IU <= N.  Not referenced if RANGE = "A" or "V"

  ABSTOL  (global input) DOUBLE PRECISION
          If JOBZ="V", setting ABSTOL to PDLAMCH( CONTEXT, "U") yields the most orthogonal eigenvectors.
          The  absolute error tolerance for the eigenvalues.  An approximate eigenvalue is accepted as converged when
          it is determined to lie in an interval [a,b] of width less than or equal to

           ABSTOL + EPS *   max( |a|,|b| ) ,

          where EPS is the machine precision.  If ABSTOL is less than or equal to zero, then EPS*norm(T) will be used
          in  its  place, where norm(T) is the 1-norm of the tridiagonal matrix obtained by reducing A to tridiagonal form.
          Eigenvalues will be computed  most  accurately  when  ABSTOL  is  set  to  twice  the  underflow  threshold
          2*PDLAMCH("S")  not  zero.   If  this routine returns with ((MOD(INFO,2).NE.0) .OR.  (MOD(INFO/8,2).NE.0)),
          indicating that some eigenvalues or eigenvectors did not converge, try setting ABSTOL to 2*PDLAMCH("S").

OUTPUT

  mene_found= (global output) Total number of eigenvalues found.  0 <= mene_found <= N.

  eigen(N)= (global output) Eigenvalues of A where N is the dimension of M
            On normal exit, the first mene_found entries contain the selected eigenvalues in ascending order.

SIDE EFFECTS

  Slk_vec<matrix_scalapack>:
   %buffer_cplx local output (global dimension (N,N)
     If JOBZ = 'V', then on normal exit the first M columns of Z
     contain the orthonormal eigenvectors of the matrix
     corresponding to the selected eigenvalues.
     If JOBZ = 'N', then Z is not referenced.

  Slk_matA<matrix_scalapack>:
    %buffer_cplx
      (local input/local output) complex(DPC) pointer into the
      local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
      On entry, this array contains the local pieces of the
      N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U',
      the leading N-by-N upper triangular part of sub( A ) contains
      the upper triangular part of the matrix.  If UPLO = 'L', the
      leading N-by-N lower triangular part of sub( A ) contains
      the lower triangular part of the matrix.

      On exit, if JOBZ = 'V', then if INFO = 0, sub( A ) contains
      the distributed matrix Z of eigenvectors.  The eigenvectors
      are normalized as follows:
      if IBtype = 1 or 2, Z**H*sub( B )*Z = I;
      if IBtype = 3, Z**H*inv( sub( B ) )*Z = I.
      If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
      or the lower triangle (if UPLO='L') of sub( A ), including
      the diagonal, is destroyed.

  Slk_matB=
    %buffer_cplx
      (local input/local output) complex*(DPC) pointer into the
      local memory to an array of dimension (LLD_B, LOCc(JB+N-1)).
      On entry, this array contains the local pieces of the
      N-by-N Hermitian distributed matrix sub( B ). If UPLO = 'U',
      the leading N-by-N upper triangular part of sub( B ) contains
      the upper triangular part of the matrix.  If UPLO = 'L', the
      leading N-by-N lower triangular part of sub( B ) contains
      the lower triangular part of the matrix.

      On exit, if INFO <= N, the part of sub( B ) containing the
      matrix is overwritten by the triangular factor U or L from
      the Cholesky factorization sub( B ) = U**H*U or
      sub( B ) = L*L**H.

SOURCE

4050 subroutine slk_pzhegvx(Slk_matA, ibtype, jobz, range, uplo, Slk_matB, vl, vu, il, iu, abstol, Slk_vec, mene_found, eigen)
4051 
4052 !Arguments ------------------------------------
4053  class(matrix_scalapack),intent(inout) :: Slk_matA
4054  integer,intent(in) :: il,iu,ibtype
4055  integer,intent(out) :: mene_found
4056  real(dp),intent(in) :: abstol,vl,vu
4057  character(len=*),intent(in) :: jobz,range,uplo
4058  class(matrix_scalapack),intent(inout) :: Slk_matB
4059  class(matrix_scalapack),intent(inout) :: Slk_vec
4060 !arrays
4061  real(dp),intent(out) :: eigen(*)
4062 
4063 #ifdef HAVE_LINALG_SCALAPACK
4064 !Local variables-------------------------------
4065 !scalars
4066  integer  :: lwork,lrwork,liwork,info,nvec_calc !,ierr
4067  real(dp) :: orfac
4068  logical :: ltest
4069  character(len=500) :: msg
4070 !arrays
4071  !integer :: ibuff(3),max_ibuff(3)
4072  integer :: desca(DLEN_),descb(DLEN_),descz(DLEN_)
4073  integer,allocatable  :: iwork(:),iclustr(:),ifail(:)
4074  real(dp),allocatable  :: rwork(:),gap(:)
4075  complex(dpc),allocatable :: work(:)
4076 
4077 !************************************************************************
4078 
4079  ABI_CHECK(allocated(Slk_matA%buffer_cplx), "buffer_cplx is not allocated!")
4080 
4081  ! abstol = PDLAMCH(Slk_vecprocessor%grid%ictxt,'U')
4082 
4083  orfac  = -one ! Only for eigenvectors: use default value 10d-3.
4084  ! Vectors within orfac*norm(A) will be reorthogonalized.
4085 
4086  ! ======================
4087  ! Alignment requirements
4088  ! ======================
4089  ! The distributed submatrices A(IA:*, JA:*), C(IC:IC+M-1,JC:JC+N-1),
4090  ! and B( IB:IB+N-1, JB:JB+N-1 ) must verify some alignment properties,
4091 
4092  desca = Slk_matA%descript%tab
4093  descb = Slk_matB%descript%tab
4094  if (firstchar(jobz, ["V", "v"])) then
4095    descz = Slk_vec%descript%tab
4096  else
4097    descz = Slk_matA%descript%tab
4098  end if
4099 
4100  ltest = .TRUE.
4101  ltest = ltest .and. (DESCA(MB_) == DESCA(NB_))
4102  !IA = IB = IZ
4103  !JA = IB = JZ
4104  ltest = ltest .and.ALL(DESCA(M_   ) == [DESCB(M_   ), DESCZ(M_   )])
4105  ltest = ltest .and.ALL(DESCA(N_   ) == [DESCB(N_   ), DESCZ(N_   )])
4106  ltest = ltest .and.ALL(DESCA(MB_  ) == [DESCB(MB_  ), DESCZ(MB_  )])
4107  ltest = ltest .and.ALL(DESCA(NB_  ) == [DESCB(NB_  ), DESCZ(NB_  )])
4108  ltest = ltest .and.ALL(DESCA(RSRC_) == [DESCB(RSRC_), DESCZ(RSRC_)])
4109  ltest = ltest .and.ALL(DESCA(CSRC_) == [DESCB(CSRC_), DESCZ(CSRC_)])
4110  !MOD( IA-1, DESCA( MB_ ) ) = 0
4111  !MOD( JA-1, DESCA( NB_ ) ) = 0
4112  !MOD( IB-1, DESCB( MB_ ) ) = 0
4113  !MOD( JB-1, DESCB( NB_ ) ) = 0
4114 
4115  if (.not.ltest) then
4116    ABI_ERROR("Alignment requirements not satisfied, check the caller")
4117  end if
4118 
4119  !Allocate the arrays for the results of the calculation
4120  ABI_MALLOC(gap, (Slk_matA%processor%grid%dims(1) * Slk_matA%processor%grid%dims(2)))
4121 
4122  if (firstchar(jobz, ["V","v"])) then
4123    ABI_MALLOC(ifail,(Slk_matA%sizeb_global(2)))
4124    ABI_MALLOC(iclustr,( 2*Slk_matA%processor%grid%dims(1) * Slk_matA%processor%grid%dims(2)))
4125  else
4126    ABI_MALLOC(ifail,(1))
4127  end if
4128 
4129  ! Get the optimal size of the work arrays.
4130  lwork=-1; lrwork=-1; liwork=-1
4131  ABI_MALLOC(work,(1))
4132  ABI_MALLOC(iwork,(1))
4133  ABI_MALLOC(rwork,(3))
4134  ! This is clearly seen in the source in which rwork(1:3) is accessed
4135  ! in the calcuation of the workspace size.
4136 
4137  call pzhegvx(ibtype,jobz,range,uplo, Slk_matA%sizeb_global(2),Slk_matA%buffer_cplx,1,1,Slk_matA%descript%tab,&
4138    Slk_matB%buffer_cplx,1,1,Slk_matB%descript%tab,&
4139    vl,vu,il,iu,abstol,mene_found,nvec_calc,eigen,orfac,&
4140    Slk_vec%buffer_cplx,1,1,Slk_vec%descript%tab,&
4141    work,lwork,rwork,lrwork,iwork,liwork,ifail,iclustr,gap,info)
4142 
4143  ABI_CHECK(info == 0, sjoin("Problem to compute workspace, info:", itoa(info)))
4144 
4145  lwork  = NINT(real(work(1)),kind=dp)
4146  lrwork = NINT(rwork(1))
4147  liwork = iwork(1)
4148 
4149  ABI_FREE(work)
4150  ABI_FREE(rwork)
4151  ABI_FREE(iwork)
4152 
4153  !FROM THE SCALAPACK MAN PAGE:
4154  !The computed eigenvectors may not be orthogonal if the minimal workspace is supplied and ORFAC is too
4155  !small. If you  want to guarantee orthogonality (at the cost of potentially poor performance) you should
4156  !add the following to LRWORK: (CLUSTERSIZE-1)*N where CLUSTERSIZE is  the  number  of  eigenvalues  in  the
4157  !largest cluster, where a cluster is defined as a set of close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
4158  !W(J+1) <= W(J) + ORFAC*2*norm(A) }.
4159 
4160  if (firstchar(jobz, ["V","v"])) then
4161    lrwork = INT( lrwork + Slk_matA%sizeb_global(2) *(Slk_matA%sizeb_global(2)-1) )
4162  end if
4163 
4164  !ibuff(1) = lwork
4165  !ibuff(2) = lrwork !INT(lrwork + Slk_matA%sizeb_global(2) *(Slk_matA%sizeb_global(2)-1)
4166  !ibuff(3) = liwork
4167 
4168  !Get the maximum of sizes of the work arrays processor%comm
4169  !call MPI_ALLREDUCE(ibuff,max_ibuff,3,MPI_integer,MPI_MAX,comm,ierr)
4170 
4171  !lwork  = max_ibuff(1)
4172  !lrwork = max_ibuff(2)
4173  !liwork = max_ibuff(3)
4174 
4175  ABI_MALLOC(work , (lwork ))
4176  ABI_MALLOC(rwork, (lrwork))
4177  ABI_MALLOC(iwork, (liwork))
4178 
4179  ! Call the scaLAPACK routine.
4180  ! write(std_out,*) 'I am using PZHEGVX'
4181  call pzhegvx(ibtype,jobz,range,uplo, Slk_matA%sizeb_global(2),Slk_matA%buffer_cplx,1,1,Slk_matA%descript%tab,&
4182     Slk_matB%buffer_cplx,1,1,Slk_matB%descript%tab,&
4183     vl,vu,il,iu,abstol,mene_found,nvec_calc, eigen,orfac,&
4184     Slk_vec%buffer_cplx,1,1,Slk_vec%descript%tab,&
4185    work,lwork,rwork,lrwork,iwork,liwork,ifail,iclustr,gap,info)
4186 
4187  ! Handle the possible error.
4188  if (info < 0) then
4189    write(msg,'(a,i7,a)')" The ",-info,"-th argument of PZHEGVX had an illegal value."
4190    if (info==-25) msg = " LRWORK is too small to compute all the eigenvectors requested, no computation is performed"
4191    ABI_ERROR(msg)
4192  end if
4193 
4194  if (info > 0) then
4195    write(msg,'(a,i0)') " PZHEGVX returned info: ",info
4196    call wrtout(std_out, msg)
4197    if (MOD(info,2)/=0)then
4198      write(msg,'(3a)')&
4199      " One or more eigenvectors failed to converge. ",ch10,&
4200      " Their indices are stored in IFAIL. Ensure ABSTOL=2.0*PDLAMCH('U')"
4201      call wrtout(std_out, msg)
4202    end if
4203    if (MOD(info / 2, 2) /= 0) then
4204      write(msg,'(5a)')&
4205      " Eigenvectors corresponding to one or more clusters of eigenvalues ",ch10,&
4206      " could not be reorthogonalized because of insufficient workspace. ",ch10,&
4207      " The indices of the clusters are stored in the array ICLUSTR."
4208      call wrtout(std_out, msg)
4209    end if
4210    if (MOD(info / 4, 2) /= 0) then
4211      write(msg,'(3a)')&
4212      " Space limit prevented PZHEGVX from computing all of the eigenvectors between VL and VU. ",ch10,&
4213      " The number of eigenvectors  computed  is returned in NZ."
4214      call wrtout(std_out, msg)
4215    end if
4216    if (MOD(info / 8, 2) /= 0) then
4217      msg = " PZSTEBZ  failed to compute eigenvalues. Ensure ABSTOL=2.0*PDLAMCH('U')"
4218      call wrtout(std_out, msg)
4219    end if
4220    if (MOD(info / 16, 2) /= 0) then
4221      write(msg,'(3a)')&
4222      " B was not positive definite.",ch10,&
4223      " IFAIL(1) indicates the order of the smallest minor which is not positive definite."
4224      call wrtout(std_out, msg)
4225    end if
4226    ABI_ERROR("Cannot continue")
4227  end if
4228 
4229  ! Check the number of eigenvalues found wrt to the number of vectors calculated.
4230  if ( firstchar(jobz, ['V','v']) .and. mene_found/=nvec_calc) then
4231    write(msg,'(5a)')&
4232    " The user supplied insufficient space and PZHEGVX is not able to detect this before beginning computation. ",ch10,&
4233    " To get all the  eigenvectors requested, the user must supply both sufficient space to hold the ",ch10,&
4234    " eigenvectors in Z (M .LE. DESCZ(N_)) and sufficient workspace to compute them. "
4235    ABI_ERROR(msg)
4236  end if
4237 
4238  ABI_FREE(work)
4239  ABI_FREE(rwork)
4240  ABI_FREE(iwork)
4241  ABI_FREE(gap)
4242  ABI_FREE(ifail)
4243  ABI_SFREE(iclustr)
4244 #endif
4245 
4246 end subroutine slk_pzhegvx

m_slk/slk_read [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_read

FUNCTION

  Routine to read a square scaLAPACK distributed matrix from an external file using MPI-IO.

INPUTS

  uplo=String specifying whether only the upper or lower triangular part of the global matrix is stored on disk:
    = "U":  Upper triangular is stored
    = "L":  Lower triangular is stored
    = "A":  Full matrix (used for general complex matrices)
  symtype=Symmetry type of the matrix stored on disk (used only if uplo = "L" or "A").
    = "H" for Hermitian matrix
    = "S" for symmetric matrix.
    = "N" if matrix has no symmetry (not compatible with uplo="L" or uplo="U".
  is_fortran_file=.FALSE. is C stream is used. .TRUE. for writing Fortran binary files.
  [fname]= Mutually exclusive with mpi_fh. The name of the external file from which the matrix will be read.
           The file is open and closed inside the routine with MPI flags specified by flags.
  [mpi_fh]=File handler associated to the file (already open in the caller). Not compatible with fname.
  [flags]=MPI-IO flags used to open the file in MPI_FILE_OPEN. Default is MPI_MODE_RDONLY. Referenced only when fname is used.

SIDE EFFECTS

  Slk_mat<matrix_scalapack>=Structured datatype defining the scaLAPACK distribution with the local buffer
    supposed to be allocated.
    %buffer_cplx=Local buffer containg the distributed matrix stored on the external file.
  If fname is present then the file is opened and closed inside the routine. Any exception is fatal.
  [offset]=
    input:  Offset used to access the content of the file. Default is zero.
    output: New offset incremented with the byte size of the matrix that has been read (Fortran
            markers are included if is_fortran_file=.TRUE.)

TODO

  - Generalize the implementation adding the reading of the real buffer.

  - This routine is not portable as this kind of access pattern is not supported by all MPI implementations
    E.g. with MPICH we have

 --- !ERROR
 src_file: m_slk.F90
 src_line: 3780
 mpi_rank: 1
 message: |
     SET_VIEW
     Other I/O error , error stack:
     ADIO_Set_view(48):  **iobadoverlap displacements of filetype must be in a monotonically nondecreasing order
 ...

 - This routine should be removed and replaced by hdf5 + mpi-io

SOURCE

5655 subroutine slk_read(Slk_mat,uplo,symtype,is_fortran_file,fname,mpi_fh,offset,flags)
5656 
5657 !Arguments ------------------------------------
5658 !scalars
5659  integer,optional,intent(in) :: flags,mpi_fh
5660  integer(XMPI_OFFSET_KIND),optional,intent(inout) :: offset
5661  character(len=*),optional,intent(in) :: fname
5662  character(len=*),intent(in) :: uplo,symtype
5663  logical,intent(in) :: is_fortran_file
5664  class(matrix_scalapack),intent(inout) :: Slk_mat
5665 
5666 !Local variables ------------------------------
5667 #if defined HAVE_LINALG_SCALAPACK && defined HAVE_MPI_IO
5668 !scalars
5669  integer :: nrows_glob,offset_err,slk_type,etype
5670  integer(XMPI_OFFSET_KIND) :: my_offset
5671  logical :: do_open
5672  integer :: comm,my_flags,my_fh,buffer_size,ierr,col_glob
5673  integer :: nfrec,bsize_elm,mpi_type_elm
5674  !complex(dpc) :: ctest
5675  logical,parameter :: check_frm=.TRUE.
5676  integer(XMPI_OFFSET_KIND),allocatable :: bsize_frecord(:)
5677 !arrays
5678  character(len=500) :: msg
5679 
5680 !************************************************************************
5681 
5682  do_open = PRESENT(fname)
5683  if (do_open) then
5684    ABI_CHECK(.not.PRESENT(fname), "fname should not be present")
5685  else
5686    ABI_CHECK(PRESENT(mpi_fh), "mpi_fh should be present")
5687  end if
5688 
5689  my_offset=0; if (PRESENT(offset)) my_offset=offset
5690 
5691  ABI_CHECK(allocated(Slk_mat%buffer_cplx), "%buffer_cplx not allocated")
5692  if (firstchar(uplo, ["U","L"]) .and. Slk_mat%sizeb_global(1) /= Slk_mat%sizeb_global(2) ) then
5693    ABI_ERROR("rectangular matrices are not compatible with the specified uplo")
5694  end if
5695 
5696  nrows_glob = Slk_mat%sizeb_global(1)
5697 
5698  buffer_size= PRODUCT(Slk_mat%sizeb_local(1:2))
5699 
5700  call wrtout(std_out, "slk_read: Using MPI-IO")
5701 
5702  comm = Slk_mat%processor%comm
5703 
5704  if (do_open) then ! Open the file.
5705    my_flags=MPI_MODE_RDONLY; if (PRESENT(flags)) my_flags = flags
5706    call MPI_FILE_OPEN(comm, fname, my_flags, MPI_INFO_NULL, my_fh, ierr)
5707    ABI_CHECK_MPI(ierr,"FILE_OPEN "//TRIM(fname))
5708  else
5709    my_fh = mpi_fh
5710  end if
5711 
5712  call slk_single_fview_read(Slk_mat,uplo,etype,slk_type,offset_err,is_fortran_file=is_fortran_file)
5713 
5714  if (offset_err/=0) then
5715    write(msg,"(3a)")&
5716     "Global position index cannot be stored in standard Fortran integer ",ch10,&
5717     "scaLAPACK matrix cannot be read with a single MPI-IO call."
5718    ABI_ERROR(msg)
5719  end if
5720 
5721  call MPI_FILE_SET_VIEW(my_fh, my_offset, etype, slk_type, 'native', MPI_INFO_NULL, ierr)
5722  ABI_CHECK_MPI(ierr,"SET_VIEW")
5723 
5724  call MPI_FILE_READ_ALL(my_fh, Slk_mat%buffer_cplx, buffer_size, MPI_DOUBLE_complex, MPI_STATUS_IGNORE, ierr)
5725  ABI_CHECK_MPI(ierr,"READ_ALL")
5726 
5727  ! Symmetrize local buffer if uplo /= "All"
5728  call slk_symmetrize(Slk_mat, uplo, symtype)
5729 
5730 !BEGINDEBUG
5731 !call MPI_FILE_READ_AT(mpi_fh,my_offset+xmpio_bsize_frm,ctest,1,MPI_DOUBLE_complex,MPI_STATUS_IGNORE,ierr)
5732 !write(std_out,*)"ctest",ctest
5733 !call MPI_FILE_READ_AT(mpi_fh,my_offset+2*xmpio_bsize_frm,ctest,1,MPI_DOUBLE_complex,MPI_STATUS_IGNORE,ierr)
5734 !write(std_out,*)"ctest",ctest
5735 !ENDDEBUG
5736 
5737  !call print_arr(Slk_mat%buffer_cplx,max_r=10,max_c=10,unit=std_out)
5738  !
5739  ! Close the file and release the MPI filetype.
5740  call MPI_type_FREE(slk_type,ierr)
5741  ABI_CHECK_MPI(ierr,"MPI_type_FREE")
5742 
5743  call slk_mat%bsize_and_type(bsize_elm, mpi_type_elm)
5744 
5745 !It seems that personal call makes the code stuck
5746 !if (is_fortran_file .and. check_frm .and. Slk_mat%Processor%myproc==0) then ! Master checks the Fortran markers.
5747  if (is_fortran_file .and. check_frm) then ! Master checks the Fortran markers.
5748    call wrtout(std_out,"Checking Fortran record markers...", do_flush=.True.)
5749    nfrec = Slk_mat%sizeb_global(2)
5750    ABI_MALLOC(bsize_frecord,(nfrec))
5751    if (firstchar(uplo, ["A"])) then
5752      bsize_frecord = Slk_mat%sizeb_global(1) * bsize_elm
5753    else if (firstchar(uplo, ["U"])) then
5754      bsize_frecord = (/(col_glob * bsize_elm, col_glob=1,nfrec)/)
5755    else if (firstchar(uplo, ["L"])) then
5756      bsize_frecord = (/(col_glob * bsize_elm, col_glob=nfrec,1,-1)/)
5757    else
5758      ABI_ERROR("Wrong uplo")
5759    end if
5760    call xmpio_check_frmarkers(my_fh,my_offset,xmpio_collective,nfrec,bsize_frecord,ierr)
5761    ABI_CHECK(ierr==0,"Wrong Fortran record markers")
5762    ABI_FREE(bsize_frecord)
5763  end if
5764 
5765  if (do_open) then ! Close the file.
5766    call MPI_FILE_CLOSE(my_fh, ierr)
5767    ABI_CHECK_MPI(ierr,"FILE_CLOSE")
5768  end if
5769 
5770 !Increment the offset
5771  if (PRESENT(offset)) then
5772    if (firstchar(uplo, ["A"])) then
5773      offset = offset + PRODUCT(Slk_mat%sizeb_global(1:2)) * bsize_elm
5774      if (is_fortran_file) offset = offset + Slk_mat%sizeb_global(2)*2*xmpio_bsize_frm
5775    else if (firstchar(uplo, ["U","L"])) then
5776      offset = offset + ( (Slk_mat%sizeb_global(2) * (Slk_mat%sizeb_global(2))+1)/2 ) * bsize_elm
5777      if (is_fortran_file) offset = offset + Slk_mat%sizeb_global(2)*2*xmpio_bsize_frm
5778    else
5779      ABI_ERROR("Wrong uplo")
5780    end if
5781  end if
5782 
5783  call xmpi_barrier(comm)
5784  RETURN
5785 
5786 #else
5787  ABI_ERROR("MPI-IO support not enabled")
5788 #endif
5789 
5790 end subroutine slk_read

m_slk/slk_set_head_and_wings [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_set_head_and_wings

FUNCTION

  Set head and the wings of the matrix starting from global arrays.

SOURCE

940 subroutine slk_set_head_and_wings(mat, head, low_wing, up_wing)
941 
942 !Arguments ------------------------------------
943  class(matrix_scalapack),intent(inout) :: mat
944  complex(dp),intent(in) :: head, low_wing(mat%sizeb_global(1)), up_wing(mat%sizeb_global(2))
945 
946 !Local variables-------------------------------
947  integer :: il_g1, il_g2, iglob1, iglob2
948  logical :: is_cplx
949 
950 ! *********************************************************************
951 
952  is_cplx = allocated(mat%buffer_cplx)
953 
954  do il_g2=1,mat%sizeb_local(2)
955    iglob2 = mat%loc2gcol(il_g2)
956    do il_g1=1,mat%sizeb_local(1)
957      iglob1 = mat%loc2grow(il_g1)
958 
959      if (iglob1 == 1 .or. iglob2 == 1) then
960        if (iglob1 == 1 .and. iglob2 == 1) then
961          if (is_cplx) then
962            mat%buffer_cplx(il_g1, il_g2) = head
963          else
964            mat%buffer_real(il_g1, il_g2) = real(head)
965          end if
966        else if (iglob1 == 1) then
967          if (is_cplx) then
968            mat%buffer_cplx(il_g1, il_g2) = up_wing(iglob2)
969          else
970            mat%buffer_real(il_g1, il_g2) = real(up_wing(iglob2))
971          end if
972        else if (iglob2 == 1) then
973          if (is_cplx) then
974            mat%buffer_cplx(il_g1, il_g2) = low_wing(iglob1)
975          else
976            mat%buffer_real(il_g1, il_g2) = real(low_wing(iglob1))
977          end if
978        end if
979      end if
980 
981    end do
982  end do
983 
984 end subroutine slk_set_head_and_wings

m_slk/slk_set_imag_diago_to_zero [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_set_imag_diago_to_zero

FUNCTION

  Set the imaginary part of the diagonal to zero.
  Return in local_max the max of the imaginar part in the local buffer.
  No MPI communication is performed inside the routine. Client code can easily reduce
  local_max within the PBLAS communicator if needed.

INPUTS

OUTPUT

SOURCE

5344 subroutine slk_set_imag_diago_to_zero(mat, local_max)
5345 
5346 !Arguments ------------------------------------
5347  class(basemat_t), intent(inout) :: mat
5348  real(dp),intent(out) :: local_max
5349 
5350 !Local variables-------------------------------
5351  integer :: il1, iglob1, il2, iglob2
5352 
5353 ! *************************************************************************
5354 
5355  local_max = -huge(one)
5356 
5357  select type (mat)
5358  class is (matrix_scalapack)
5359    if (allocated(mat%buffer_real)) return
5360    do il2=1,mat%sizeb_local(2)
5361      iglob2 = mat%loc2gcol(il2)
5362      do il1=1,mat%sizeb_local(1)
5363        iglob1 = mat%loc2grow(il1)
5364        if (iglob1 == iglob2) then
5365          local_max = max(local_max, aimag(mat%buffer_cplx(il1, il2)))
5366          mat%buffer_cplx(il1, il2) = real(mat%buffer_cplx(il1, il2))
5367        end if
5368      end do
5369    end do
5370 
5371  class is (slkmat_sp_t)
5372    if (allocated(mat%buffer_real)) return
5373    do il2=1,mat%sizeb_local(2)
5374      iglob2 = mat%loc2gcol(il2)
5375      do il1=1,mat%sizeb_local(1)
5376        iglob1 = mat%loc2grow(il1)
5377        if (iglob1 == iglob2) then
5378          local_max = max(local_max, aimag(mat%buffer_cplx(il1, il2)))
5379          mat%buffer_cplx(il1, il2) = real(mat%buffer_cplx(il1, il2))
5380        end if
5381      end do
5382    end do
5383  class default
5384    ABI_ERROR("Wrong class")
5385  end select
5386 
5387 
5388 end subroutine slk_set_imag_diago_to_zero

m_slk/slk_single_fview_read [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_single_fview_read

FUNCTION

  Return an MPI datatype that can be used to read a scaLAPACK distributed matrix from
  a binary file using MPI-IO.

INPUTS

  Slk_mat<matrix_scalapack>=Structured datatype defining the scaLAPACK distribution with the local buffer.
  uplo=String specifying whether only the upper or lower triangular part of the global matrix is stored on disk:
    = "U":  Upper triangular is stored
    = "L":  Lower triangular is stored
    = "A":  Full matrix (used for general complex matrices)
  [is_fortran_file]=.FALSE. is C stream is used. .TRUE. for writing Fortran binary files
    with record markers. In this case etype is set xmpio_mpi_type_frm provided that
    the mpi_type of the matrix element is commensurate with xmpio_mpi_type_frm. Defaults to .TRUE.

OUTPUT

  etype=Elementary data type (handle) defining the elementary unit used to access the file.
  slk_type=New MPI type that can be used to instantiate the MPI-IO view for the Fortran file.
     Note that the view assumes that the file pointer points to the FIRST Fortran record marker.
  offset_err=Error code. A non-zero value signals that the global matrix is too large
    for a single MPI-IO access (see notes below).

NOTES

  With (signed) Fortran integers, the maximum size of the file that
  that can be read in one-shot is around 2Gb when etype is set to byte.
  Using a larger etype might create portability problems (real data on machines using
  integer*16 for the marker) since etype must be a multiple of the Fortran record marker
  Due to the above reason, block_displ is given in bytes and must be stored in a integer
  of kind XMPI_ADDRESS_KIND. If the displacement is too large, the routine returns
  offset_err=1 so that the caller will know that several MPI-IO reads are needed to
  read the local buffer.

SOURCE

6114 subroutine slk_single_fview_read(Slk_mat,uplo,etype,slk_type,offset_err,is_fortran_file)
6115 
6116 !Arguments ------------------------------------
6117 !scalars
6118  integer,intent(out) :: offset_err,slk_type,etype
6119  character(len=*),intent(in) :: uplo
6120  logical,optional,intent(in) :: is_fortran_file
6121  class(matrix_scalapack),intent(in) :: Slk_mat
6122 
6123 !Local variables ------------------------------
6124 !scalars
6125  integer :: jloc,iloc,iglob,jglob,nrows_glob,ncols_glob,mpi_err,nel
6126  integer :: bsize_frm,mpi_type_elm,ij_loc,bsize_etype,bsize_elm
6127  integer(XMPI_OFFSET_KIND) :: ijp_glob,my_offset,cpad_frm
6128 !arrays
6129  character(len=500) :: msg
6130  integer,allocatable :: block_length(:),block_type(:)
6131  integer(XMPI_ADDRESS_KIND),allocatable :: block_displ(:)
6132 
6133 !************************************************************************
6134 
6135 #ifdef HAVE_MPI_IO
6136 !@matrix_scalapack
6137  bsize_frm = xmpio_bsize_frm    ! Byte size of the Fortran record marker.
6138  if (PRESENT(is_fortran_file)) then
6139    if (.not.is_fortran_file) bsize_frm = 0
6140  end if
6141 
6142  call slk_mat%bsize_and_type(bsize_elm, mpi_type_elm)
6143 
6144  ! Global dimensions.
6145  nrows_glob=Slk_mat%sizeb_global(1)
6146  ncols_glob=Slk_mat%sizeb_global(2)
6147 
6148  ! Number of matrix elements treated by this node.
6149  nel = PRODUCT(Slk_mat%sizeb_local(1:2))
6150 
6151  !Cannot use MPI_type_CREATE_INDEXED_BLOCK since it is not correctly implemented in several MPI libraries.
6152  !etype has to be set to MPI_BYTE, since the displacement in MPI structures is always in byte.
6153  ! ABI_WARNING("Using MPI_type_STRUCT for the MPI-IO file view")
6154 
6155  etype = MPI_BYTE
6156  call MPI_type_SIZE(etype,bsize_etype,mpi_err)
6157 
6158  ! Define the mapping between scaLAPACK buffer and the storage on file.
6159  ABI_MALLOC(block_length, (nel+2))
6160  ABI_MALLOC(block_displ, (nel+2))
6161  ABI_MALLOC(block_type, (nel+2))
6162  block_length(1)=1
6163  block_displ (1)=0
6164  block_type  (1)=MPI_LB
6165 
6166  ! Note that the view assumes that the file pointer points to the first Fortran record marker.
6167  offset_err=0
6168  select case (uplo(1:1))
6169  case ("A","a") ! The entire global matrix is stored on disk. TODO can use contigous vectors for better access.
6170    ij_loc=0
6171    do jloc=1,Slk_mat%sizeb_local(2)
6172      do iloc=1,Slk_mat%sizeb_local(1)
6173        call slk_mat%loc2glob(iloc, jloc, iglob, jglob)
6174        ij_loc  = ij_loc+1
6175        my_offset = 2*(jglob-1)*bsize_frm + bsize_frm + (jglob-1)*nrows_glob*bsize_elm + (iglob-1) * bsize_elm
6176        my_offset = my_offset / bsize_etype
6177        if (xmpio_max_address(my_offset)) offset_err=1   ! Test for possible wraparounds
6178        block_displ (ij_loc+1) = my_offset
6179        block_type  (ij_loc+1) = mpi_type_elm
6180        block_length(ij_loc+1) = 1
6181      end do
6182    end do
6183 
6184  case ("U","u") ! Only the upper triangle of the global matrix is stored on disk.
6185    ij_loc=0
6186    do jloc=1,Slk_mat%sizeb_local(2)
6187      do iloc=1,Slk_mat%sizeb_local(1)
6188        call slk_mat%loc2glob(iloc, jloc, iglob, jglob)
6189        if (jglob>=iglob) then
6190          ijp_glob = iglob + jglob*(jglob-1)/2  ! Index for packed form
6191          cpad_frm = 2*(jglob-1)*bsize_frm
6192        else
6193          ijp_glob = jglob + iglob*(iglob-1)/2  ! Index for packed form
6194          cpad_frm = 2*(iglob-1)*bsize_frm
6195        end if
6196        ij_loc = ij_loc+1
6197        my_offset = cpad_frm + bsize_frm + (ijp_glob-1) * bsize_elm
6198        my_offset = my_offset / bsize_etype
6199        if (xmpio_max_address(my_offset)) offset_err=1  ! Test for possible wraparounds
6200        block_displ (ij_loc+1) = my_offset
6201        block_type  (ij_loc+1) = mpi_type_elm
6202        block_length(ij_loc+1) = 1
6203      end do
6204    end do
6205 
6206  case ("L","l") ! Only the lower triangle of the global matrix is stored on disk.
6207    ij_loc=0
6208    do jloc=1,Slk_mat%sizeb_local(2)
6209      do iloc=1,Slk_mat%sizeb_local(1)
6210        call slk_mat%loc2glob(iloc, jloc, iglob, jglob)
6211        if (jglob<=iglob) then
6212          ijp_glob = iglob + (jglob-1)*(2*nrows_glob-jglob)/2 ! Index for packed form
6213          cpad_frm = 2*(jglob-1)*bsize_frm
6214        else
6215          ijp_glob = jglob + (iglob-1)*(2*nrows_glob-iglob)/2 ! Index for packed form
6216          cpad_frm = 2*(iglob-1)*bsize_frm
6217        end if
6218        ij_loc = ij_loc+1
6219        my_offset = cpad_frm + bsize_frm + (ijp_glob-1) * bsize_elm
6220        my_offset = my_offset / bsize_etype
6221        if (xmpio_max_address(my_offset)) offset_err=1   ! block_displ is usually integer*4. Test for possible wraparounds
6222        block_displ  (ij_loc+1) = my_offset
6223        block_type  (ij_loc+1) = mpi_type_elm
6224        block_length(ij_loc+1) = 1
6225      end do
6226    end do
6227 
6228    if (offset_err/=0) then  ! just warn, let the caller handle the exception.
6229      write(msg,"(3a)")&
6230       " Global position index cannot be stored in standard Fortran integer ",ch10,&
6231       " scaLAPACK matrix cannot be read with a single MPI-IO call ."
6232      ABI_WARNING(msg)
6233    end if
6234 
6235  case default
6236    ABI_BUG(" Wrong uplo: "//TRIM(uplo))
6237  end select
6238 
6239  block_length(nel+2)= 1
6240  block_displ (nel+2)= ncols_glob * (nrows_glob*bsize_elm + 2*bsize_frm) / bsize_etype
6241  block_type  (nel+2)= MPI_UB
6242 
6243  call xmpio_type_struct(nel+2,block_length,block_displ,block_type,slk_type,mpi_err)
6244  ABI_CHECK_MPI(mpi_err,"MPI_type_STRUCT")
6245 
6246  ABI_FREE(block_length)
6247  ABI_FREE(block_displ)
6248  ABI_FREE(block_type)
6249 
6250  call MPI_type_COMMIT(slk_type,mpi_err)
6251  ABI_CHECK_MPI(mpi_err,"MPI_type_COMMIT")
6252 
6253 #else
6254  ABI_ERROR("MPI-IO is mandatatory in slk_single_fview_read")
6255 #endif
6256 
6257 end subroutine slk_single_fview_read

m_slk/slk_single_fview_read_mask [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_single_fview_read_mask

FUNCTION

  Return an MPI datatype that can be used to read a scaLAPACK distributed matrix from
  a binary file using MPI-IO. The view is created using the user-defined mask function
  mask_of_glob. The storage of the data on file is described via the user-defined function offset_of_glob.

INPUTS

  Slk_mat<matrix_scalapack>=Structured datatype defining the scaLAPACK matrix.
  mask_of_glob(row_glob,col_glob,size_glob) is an integer function that accepts in input
     the global indices of the matrix size_glob(1:2) are the global dimensions.
     Return 0 if (row_glob,col_glob) should not be read.
  offset_of_glob(row_glob,col_glob,size_glob,nsblocks,sub_block,bsize_elm,bsize_frm)
  nsblocks=Number of sub-blocks (will be passed to offset_of_glob)
  sub_block(2,2,nsblocks)=Global coordinates of the extremal points delimiting the sub-blocs
   e.g. sub_block(:,1,1) gives the coordinates of the left upper corner of the first block.
        sub_block(:,2,1) gives the coordinates of the right lower corner of the first block.
  [is_fortran_file]=.FALSE. is C stream is used. Set to .TRUE. for writing Fortran binary files
    with record marker.

OUTPUT

  my_nel=Number of elements that will be read by this node.
  etype=Elementary data type (handle) defining the elementary unit used to access the file.
    This is the elementary type that must be used to creae the view (MPI_BYTE is used).
  slk_type=New MPI type that can be used to instantiate the MPI-IO view for the Fortran file.
     Note that the view assumes that the file pointer points to the FIRST Fortran record marker.
  offset_err=Error code. A returned non-zero value signals that the global matrix is too large
    for a single MPI-IO access. See notes in other slk_single_fview_* routines.

SIDE EFFECTS

  myel2loc(:,:)
    input: pointer to NULL
    output: myel2loc(2,my_nel):  myel2loc(:,el) gives (iloc,jloc) for el=1,my_nel.

SOURCE

5833 subroutine slk_single_fview_read_mask(Slk_mat,mask_of_glob,offset_of_glob,nsblocks,sub_block,&
5834                                       my_nel,myel2loc,etype,slk_type,offset_err,is_fortran_file)
5835 
5836 !Arguments ------------------------------------
5837 !scalars
5838  integer,intent(in) :: nsblocks
5839  integer,intent(out) :: my_nel,offset_err,slk_type,etype
5840  logical,optional,intent(in) :: is_fortran_file
5841  class(matrix_scalapack),intent(in) :: Slk_mat
5842 !arrays
5843  integer,intent(in) :: sub_block(2,2,nsblocks)
5844  integer,pointer :: myel2loc(:,:)
5845 
5846  interface
5847    function mask_of_glob(row_glob,col_glob,size_glob)
5848      use defs_basis
5849      integer :: mask_of_glob
5850      integer,intent(in) :: row_glob,col_glob
5851      integer,intent(in) :: size_glob(2)
5852    end function mask_of_glob
5853  end interface
5854 
5855  interface
5856    function offset_of_glob(row_glob,col_glob,size_glob,nsblocks,sub_block,bsize_elm,bsize_frm)
5857      use defs_basis
5858      use m_xmpi
5859      integer(XMPI_OFFSET_KIND) :: offset_of_glob
5860      integer,intent(in) :: row_glob,col_glob,bsize_elm,bsize_frm,nsblocks
5861      integer,intent(in) :: size_glob(2),sub_block(2,2,nsblocks)
5862    end function offset_of_glob
5863  end interface
5864 
5865 !Local variables ------------------------------
5866 !scalars
5867  integer :: el,jloc,iloc,iglob,jglob,mpi_err,sweep
5868  integer :: bsize_frm,mpi_type_elm,bsize_elm
5869  integer(XMPI_OFFSET_KIND) :: tmp_off,max_displ
5870 !arrays
5871  character(len=500) :: msg
5872  integer,allocatable :: block_length(:),block_type(:)
5873  integer(XMPI_ADDRESS_KIND),allocatable :: block_displ(:)
5874 
5875 !************************************************************************
5876 
5877 #ifdef HAVE_MPI_IO
5878  bsize_frm = xmpio_bsize_frm  ! Byte size of the Fortran record marker.
5879  if (PRESENT(is_fortran_file)) then
5880    if (.not.is_fortran_file) bsize_frm = 0
5881  end if
5882 
5883  ! Byte size of the matrix element.
5884  call slk_mat%bsize_and_type(bsize_elm, mpi_type_elm)
5885 
5886  ! Find the number of local matrix elements to be read, then create the table myel2loc.
5887  do sweep=1,2
5888    if (sweep==2) then
5889       ABI_MALLOC(myel2loc,(2,my_nel))
5890    end if
5891    my_nel=0
5892 
5893    do jloc=1,Slk_mat%sizeb_local(2)
5894      do iloc=1,Slk_mat%sizeb_local(1)
5895        call slk_mat%loc2glob(iloc, jloc, iglob, jglob)
5896        if ( mask_of_glob(iglob,jglob,Slk_mat%sizeb_global)/= 0) then ! Will fill this entry.
5897          my_nel  = my_nel+1
5898          if (sweep==2) myel2loc(:,my_nel) = (/iloc,jloc/)
5899        end if
5900      end do
5901    end do
5902  end do
5903 
5904  etype = MPI_BYTE
5905 
5906  ! Define the mapping between scaLAPACK buffer and the storage on file.
5907  ! Note that the view assumes that the file pointer points to the first Fortran record marker.
5908  ABI_MALLOC(block_length,(my_nel+2))
5909  ABI_MALLOC(block_displ,(my_nel+2))
5910  ABI_MALLOC(block_type,(my_nel+2))
5911  block_length(1)=1
5912  block_displ (1)=0
5913  block_type  (1)=MPI_LB
5914 
5915  offset_err=0; max_displ=0
5916  do el=1,my_nel
5917    iloc = myel2loc(1,el)
5918    jloc = myel2loc(2,el)
5919    call slk_mat%loc2glob(iloc, jloc, iglob, jglob)
5920    tmp_off = offset_of_glob(iglob,jglob,Slk_mat%sizeb_global,nsblocks,sub_block,bsize_elm,bsize_frm)
5921    if (xmpio_max_address(tmp_off)) offset_err=1   ! Test for possible wraparounds.
5922    max_displ = MAX(max_displ,tmp_off)
5923    block_displ (el+1) = tmp_off
5924    block_type  (el+1) = mpi_type_elm
5925    block_length(el+1) = 1
5926    !write(std_out,*)" iglob, jglob, tmp_off ",iglob, jglob, tmp_off
5927  end do
5928  !write(std_out,*)" MAX displ is ",MAXVAL(block_displ)
5929 
5930  if (offset_err/=0) then  ! just warn, let the caller handle the exception.
5931    write(msg,"(3a)")&
5932     " Global position index cannot be stored in standard Fortran integer ",ch10,&
5933     " scaLAPACK matrix cannot be read with a single MPI-IO call ."
5934    ABI_WARNING(msg)
5935  end if
5936 
5937  block_length(my_nel+2) = 1
5938  block_displ (my_nel+2) = max_displ
5939  block_type  (my_nel+2) = MPI_UB
5940 
5941  call xmpio_type_struct(my_nel+2,block_length,block_displ,block_type,slk_type,mpi_err)
5942  ABI_CHECK_MPI(mpi_err,"MPI_type_STRUCT")
5943 
5944  ABI_FREE(block_length)
5945  ABI_FREE(block_displ)
5946  ABI_FREE(block_type)
5947 
5948  call MPI_type_COMMIT(slk_type,mpi_err)
5949  ABI_CHECK_MPI(mpi_err,"MPI_type_COMMIT")
5950 
5951 #else
5952  ABI_ERROR("MPI-IO is mandatatory in slk_single_fview_read_mask")
5953 #endif
5954 
5955 end subroutine slk_single_fview_read_mask

m_slk/slk_single_fview_write [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_single_fview_write

FUNCTION

  Returns an MPI datatype that can be used to write a scaLAPACK distributed matrix to
  a binary file using MPI-IO.

INPUTS

  Slk_mat<matrix_scalapack>=Structured datatype defining the scaLAPACK distribution with the local buffer.
  uplo=String specifying whether only the upper or lower triangular part of the global matrix is stored on disk:
    = "U":  Upper triangular is stored
    = "L":  Lower triangular is stored
    = "A":  Full matrix (used for general complex matrices)
  [is_fortran_file]=.FALSE. is C stream is used. .TRUE. for writing Fortran binary files
    with record marker. In this case etype is set xmpio_mpi_type_frm provided that
    the mpi_type of the matrix element is commensurate with xmpio_mpi_type_frm. Defaults to .TRUE.
  glob_subarray(2,2) = Used to select the subarray of the global matrix. Used only when uplo="All"
     glob_subarray(:,1)=starting global coordinates of the subarray in each dimension
     (array of nonnegative integers >=1, <=array_of_sizes)
     glob_subarray(:,2)=Number of elements in each dimension of the subarray (array of positive integers)

OUTPUT

  nelw=Number of elements to be written.
  etype=Elementary data type (handle) defining the elementary unit used to access the file.
  slk_type=New MPI type that can be used to instantiate the MPI-IO view for the Fortran file.
     Note that the view assumes that the file pointer points to the FIRST Fortran record marker.
  offset_err=Error code. A non-zero value signals that the global matrix is too large
    for a single MPI-IO access (see notes below).

SIDE EFFECTS

  elw2slk(:,:) =
    input:  pointer to null().
    output: elw2slk(2,nelw) contains the local coordinates of the matrix elements to be written.
     (useful only if the upper or lower triangle of the global matrix has to be written or when
      uplo="all" but a global subarray is written.

NOTES

  With (signed) Fortran integers, the maximum size of the file that
  that can be read in one-shot is around 2Gb when etype is set to byte.
  Using a larger etype might create portability problems (real data on machines using
  integer*16 for the marker) since etype must be a multiple of the Fortran record marker
  Due to the above reason, block_displ is given in bytes and must be stored in a Fortran
  integer of kind XMPI_ADDRESS_KIND. If the displacement is too large, the routine returns
  offset_err=1 so that the caller will know that several MPI-IO reads are needed to
  write the local buffer.

SOURCE

6311 subroutine slk_single_fview_write(Slk_mat,uplo,nelw,elw2slk,etype,slk_type,offset_err,is_fortran_file,glob_subarray)
6312 
6313 !Arguments ------------------------------------
6314 !scalars
6315  class(matrix_scalapack),intent(in) :: Slk_mat
6316  integer,intent(out) :: offset_err,slk_type,etype,nelw
6317  character(len=*),intent(in) :: uplo
6318  logical,optional,intent(in) :: is_fortran_file
6319 !arrays
6320  integer,pointer :: elw2slk(:,:)
6321  integer,optional,intent(in) :: glob_subarray(2,2)
6322 
6323 !Local variables ------------------------------
6324 !scalars
6325  integer :: jloc,iloc,iglob,jglob,nrows_glob,ncols_glob,mpi_err,nel_max
6326  integer :: grow_min,grow_max,gcol_min,gcol_max
6327  integer :: bsize_frm,mpi_type_elm,ij_loc,bsize_elm
6328  integer(XMPI_OFFSET_KIND) :: ijp_glob,my_offset,cpad_frm
6329 !arrays
6330  character(len=500) :: msg
6331  integer,allocatable :: block_length(:),block_type(:)
6332  integer(XMPI_ADDRESS_KIND),allocatable :: block_displ(:)
6333 
6334 !************************************************************************
6335 
6336 #ifdef HAVE_MPI_IO
6337 !@matrix_scalapack
6338  bsize_frm = xmpio_bsize_frm    ! Byte size of the Fortran record marker.
6339  if (PRESENT(is_fortran_file)) then
6340    if (.not.is_fortran_file) bsize_frm = 0
6341  end if
6342 
6343  if (PRESENT(glob_subarray).and..not.firstchar(uplo, ["A"])) then
6344    ABI_ERROR("glob_subarray should not be used when uplo/=All")
6345  end if
6346 
6347  call slk_mat%bsize_and_type(bsize_elm, mpi_type_elm)
6348 
6349  ! Global dimensions.
6350  nrows_glob=Slk_mat%sizeb_global(1)
6351  ncols_glob=Slk_mat%sizeb_global(2)
6352 
6353  ! Number of matrix elements treated by this node.
6354  nel_max = PRODUCT(Slk_mat%sizeb_local(1:2))
6355 
6356  ABI_MALLOC(elw2slk,(2,nel_max))
6357  elw2slk=0
6358 
6359  ! Cannot use MPI_type_CREATE_INDEXED_BLOCK since it is not correctly implemented in several MPI libraries.
6360  ! etype has to be set to MPI_BYTE, since the displacement in MPI structures is always in byte.
6361  etype = MPI_BYTE
6362 
6363  ! Define the mapping between scaLAPACK buffer and the storage on file.
6364  ABI_MALLOC(block_length, (nel_max+2))
6365  ABI_MALLOC(block_displ, (nel_max+2))
6366  ABI_MALLOC(block_type, (nel_max+2))
6367  block_length(1)=1
6368  block_displ (1)=0
6369  block_type  (1)=MPI_LB
6370 
6371  ! Note that the view assumes that the file pointer points to the first Fortran record marker.
6372  offset_err=0
6373 
6374  select case (uplo(1:1))
6375  case ("A","a")
6376    ! The entire global matrix is written on disk. TODO can use contigous vectors for better access.
6377    grow_min=1; grow_max=nrows_glob
6378    gcol_min=1; gcol_max=ncols_glob
6379    if (PRESENT(glob_subarray)) then ! subarray access.
6380      grow_min = glob_subarray(1,1)
6381      gcol_min = glob_subarray(2,1)
6382      grow_max = grow_min + glob_subarray(1,2) -1
6383      gcol_max = gcol_min + glob_subarray(2,2) -1
6384    end if
6385 
6386    ij_loc=0
6387    do jloc=1,Slk_mat%sizeb_local(2)
6388      do iloc=1,Slk_mat%sizeb_local(1)
6389        call slk_mat%loc2glob(iloc, jloc, iglob, jglob)
6390        if (iglob>=grow_min.and.iglob<=grow_max .and. &  ! glob_subarray element.
6391            jglob>=gcol_min.and.jglob<=gcol_max) then
6392          ij_loc  = ij_loc+1
6393          my_offset = 2*(jglob-1)*bsize_frm + bsize_frm + (jglob-1)*nrows_glob*bsize_elm + (iglob-1) * bsize_elm
6394          if (xmpio_max_address(my_offset)) offset_err=1   ! Test for possible wraparounds
6395          block_displ (ij_loc+1) = my_offset
6396          block_type  (ij_loc+1) = mpi_type_elm
6397          block_length(ij_loc+1) = 1
6398          elw2slk(:,ij_loc) = (/iloc,jloc/) ! useless when subarray are not used but oh well!
6399        end if
6400      end do
6401    end do
6402 
6403  case ("U","u")
6404    ! Only the upper triangle of the global matrix is stored on disk.
6405    ij_loc=0
6406    do jloc=1,Slk_mat%sizeb_local(2)
6407      do iloc=1,Slk_mat%sizeb_local(1)
6408        call slk_mat%loc2glob(iloc, jloc, iglob, jglob)
6409        if (jglob>=iglob) then
6410          ijp_glob = iglob + jglob*(jglob-1)/2  ! Index for packed form
6411          cpad_frm = 2*(jglob-1)*bsize_frm
6412          ij_loc = ij_loc+1
6413          my_offset = cpad_frm + bsize_frm + (ijp_glob-1) * bsize_elm
6414          if (xmpio_max_address(my_offset)) offset_err=1   ! Test for possible wraparounds
6415          block_displ (ij_loc+1) = my_offset
6416          block_type  (ij_loc+1) = mpi_type_elm
6417          block_length(ij_loc+1) = 1
6418          elw2slk(:,ij_loc) = (/iloc,jloc/)
6419        end if
6420      end do
6421    end do
6422 
6423  case ("L","l")
6424    ! Only the lower triangle of the global matrix is stored on disk.
6425    ij_loc=0
6426    do jloc=1,Slk_mat%sizeb_local(2)
6427      do iloc=1,Slk_mat%sizeb_local(1)
6428        call slk_mat%loc2glob(iloc, jloc, iglob, jglob)
6429        if (jglob<=iglob) then
6430          ijp_glob = iglob + (jglob-1)*(2*nrows_glob-jglob)/2 ! Index for packed form
6431          cpad_frm = 2*(jglob-1)*bsize_frm
6432          ij_loc = ij_loc+1
6433          my_offset = cpad_frm + bsize_frm + (ijp_glob-1) * bsize_elm
6434          if (xmpio_max_address(my_offset)) offset_err=1   ! block_displ is usually integer*4. Test for possible wraparounds
6435          block_displ (ij_loc+1) = my_offset
6436          block_type  (ij_loc+1) = mpi_type_elm
6437          block_length(ij_loc+1) = 1
6438          elw2slk(:,ij_loc) = (/iloc,jloc/)
6439        end if
6440      end do
6441    end do
6442 
6443  case default
6444    ABI_BUG(" Wrong uplo: "//TRIM(uplo))
6445  end select
6446 
6447  if (offset_err/=0) then  ! just warn, let the caller handle the exception.
6448    write(msg,"(3a)")&
6449     "Global position index cannot be stored in standard Fortran integer ",ch10,&
6450     "scaLAPACK matrix cannot be read with a single MPI-IO call ."
6451    ABI_WARNING(msg)
6452  end if
6453 
6454  ! Final number of matrix elements that will be written by this node.
6455  nelw = ij_loc
6456 
6457  block_length(nelw+2)= 1
6458  block_displ (nelw+2)= ncols_glob * (nrows_glob*bsize_elm + 2*bsize_frm)
6459  block_type  (nelw+2)= MPI_UB
6460 
6461  call xmpio_type_struct(nelw+2,block_length,block_displ,block_type,slk_type,mpi_err)
6462  ABI_CHECK_MPI(mpi_err, "MPI_type_STRUCT")
6463 
6464  ABI_FREE(block_length)
6465  ABI_FREE(block_displ)
6466  ABI_FREE(block_type)
6467 
6468  call MPI_type_COMMIT(slk_type,mpi_err)
6469  ABI_CHECK_MPI(mpi_err, "MPI_type_COMMIT")
6470 
6471 #else
6472  ABI_ERROR("MPI-IO is mandatatory in slk_single_fview_read")
6473 #endif
6474 
6475 end subroutine slk_single_fview_write

m_slk/slk_symmetrize [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_symmetrize

FUNCTION

  Symmetrize a square scaLAPACK matrix.

INPUTS

  uplo=String specifying whether only the upper or lower triangular part of the global matrix has been read
    = "U":  Upper triangular has been read.
    = "L":  Lower triangular has been read.
    = "A":  Full matrix (used for general complex matrices)
  symtype=Symmetry type of the matrix (used only if uplo = "L" or "A").
    = "H" for Hermitian matrix
    = "S" for symmetric matrix.
    = "N" if matrix has no symmetry (not compatible with uplo="L" or uplo="U".

SIDE EFFECTS

  Slk_mat<matrix_scalapack>=Structured datatype defining the scaLAPACK distribution with the local buffer
    supposed to be allocated.
    %buffer_cplx=Local buffer containg the distributed matrix stored on the external file.

SOURCE

5984 subroutine slk_symmetrize(Slk_mat, uplo, symtype)
5985 
5986 !Arguments ------------------------------------
5987 !scalars
5988  class(matrix_scalapack),intent(inout) :: Slk_mat
5989  character(len=*),intent(in) :: symtype
5990  character(len=*),intent(in) :: uplo
5991 
5992 !Local variables ------------------------------
5993 !scalars
5994  integer :: jloc,iloc,iglob,jglob,ij_loc
5995  logical :: is_hermitian,is_real,is_cplx,is_symmetric
5996  character(len=500) :: msg
5997 
5998 !************************************************************************
5999 
6000  is_cplx = (allocated(Slk_mat%buffer_cplx))
6001  is_real = (allocated(Slk_mat%buffer_real))
6002 
6003  ! One and only one buffer should be allocated.
6004  if (is_real .and. is_cplx) then
6005    write(msg,'(a,2l1)')" ScaLAPACK buffers are not allocated correctly, is_real=, is_cplx ",is_real,is_cplx
6006    ABI_ERROR(msg)
6007  end if
6008 
6009  if (is_real) RETURN
6010 
6011  is_hermitian=.FALSE.; is_symmetric=.FALSE.
6012  select case (symtype(1:1))
6013  case ("H", "h")
6014    is_hermitian = .TRUE.
6015  case ("S","s")
6016    is_symmetric = .TRUE.
6017  case("N","n")
6018    if (ALL(uplo(1:1) /= ["A","a"])) then
6019      msg = " Found symtype= "//TRIM(symtype)//", but uplo= "//TRIM(uplo)
6020      ABI_ERROR(msg)
6021    end if
6022    RETURN  ! Nothing to do.
6023  case default
6024    ABI_ERROR("Wrong symtype "//TRIM(symtype))
6025  end select
6026 
6027  !write(std_out,*)"is_cplx",is_cplx
6028  !write(std_out,*)"is_hermitian",is_hermitian
6029 
6030  select case (uplo(1:1))
6031  case ("A","a")
6032    ! Full global matrix has been read, nothing to do.
6033    return
6034 
6035  case ("U", "u")
6036    ! Only the upper triangle of the global matrix was read.
6037    if (is_cplx .and. is_hermitian) then
6038      ij_loc=0
6039      do jloc=1,Slk_mat%sizeb_local(2)
6040        do iloc=1,Slk_mat%sizeb_local(1)
6041          call slk_mat%loc2glob(iloc, jloc, iglob, jglob)
6042          ij_loc = ij_loc+1
6043          if (jglob < iglob) then
6044           ! Diagonal elements are not forced to be real.
6045            Slk_mat%buffer_cplx(iloc,jloc) = DCONJG(Slk_mat%buffer_cplx(iloc,jloc))
6046          end if
6047          !if (iglob==jglob) Slk_mat%buffer_cplx(iloc,jloc) =  real(Slk_mat%buffer_cplx(iloc,jloc))
6048        end do
6049      end do
6050    end if
6051 
6052  case ("L", "l")
6053    ! Only the lower triangle of the global matrix was read.
6054    if (is_cplx .and. is_hermitian) then
6055      ij_loc=0
6056      do jloc=1,Slk_mat%sizeb_local(2)
6057        do iloc=1,Slk_mat%sizeb_local(1)
6058          call slk_mat%loc2glob(iloc, jloc, iglob, jglob)
6059          ij_loc = ij_loc+1
6060          if (jglob>iglob) then ! diagonal elements are not forced to be real.
6061            Slk_mat%buffer_cplx(iloc,jloc) =  DCONJG(Slk_mat%buffer_cplx(iloc,jloc))
6062          end if
6063          !if (iglob==jglob) Slk_mat%buffer_cplx(iloc,jloc) =  real(Slk_mat%buffer_cplx(iloc,jloc))
6064        end do
6065      end do
6066    end if
6067 
6068  case default
6069    ABI_BUG(" Wrong uplo: "//TRIM(uplo))
6070  end select
6071 
6072 end subroutine slk_symmetrize

m_slk/slk_take_from [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_take_from

FUNCTION

  Take values from source
  NB: This routine should be called by all procs owning mat and source.

INPUTS

  [free]: True if source should be deallocated. Default: False

OUTPUT

SOURCE

4961 subroutine slk_take_from(out_mat, source, &
4962                          ija, ijb, free) ! optional
4963 
4964 !Arguments ------------------------------------
4965  class(matrix_scalapack),intent(inout) :: out_mat
4966  class(matrix_scalapack),intent(inout) :: source
4967  integer,optional,intent(in) :: ija(2), ijb(2)
4968  logical,optional,intent(in) :: free
4969 
4970 !Local variables-------------------------------
4971  integer :: mm, nn
4972  character(len=500) :: msg
4973  integer :: ija__(2), ijb__(2)
4974 
4975 ! *************************************************************************
4976 
4977  ! prototype
4978  !call pzgemr2d(m, n, a, ia, ja, desca, b, ib, jb, descb, ictxt)
4979 
4980  ! Take care when context A is disjoint from context B. The general rules for which parameters need to be set are:
4981  !
4982  !   - All calling processes must have the correct m and n.
4983  !   - Processes in context A must correctly define all parameters describing A.
4984  !   - Processes in context B must correctly define all parameters describing B.
4985  !   - Processes which are not members of context A must pass ctxt_a = -1 and need not set other parameters describing A.
4986  !   - Processes which are not members of contextB must pass ctxt_b = -1 and need not set other parameters describing B.
4987 
4988  mm = source%sizeb_global(1)
4989  nn = source%sizeb_global(2)
4990 
4991  ija__ = [1, 1]; if (present(ija)) ija__ = ija
4992  ijb__ = [1, 1]; if (present(ijb)) ijb__ = ijb
4993 
4994  if (all(out_mat%sizeb_global == -1)) then
4995    out_mat%descript%tab(CTXT_) = -1
4996  else
4997    ABI_CHECK_IEQ(out_mat%istwf_k, source%istwf_k, "istwfk_mat /= istwfk_source")
4998    if (any(out_mat%sizeb_global /= source%sizeb_global)) then
4999      msg = sjoin("Matrices should have same global shape but out_mat:", ltoa(out_mat%sizeb_global), &
5000                  "source:", ltoa(source%sizeb_global))
5001      ABI_ERROR(msg)
5002    end if
5003  end if
5004 
5005  if (allocated(source%buffer_cplx)) then
5006 #ifdef HAVE_LINALG_SCALAPACK
5007    call pzgemr2d(mm, nn,  &
5008                  source%buffer_cplx, ija__(1), ija__(2), source%descript%tab,   &
5009                  out_mat%buffer_cplx, ijb__(1), ijb__(2), out_mat%descript%tab, &
5010                  source%processor%grid%ictxt)
5011 
5012  else if (allocated(source%buffer_real)) then
5013    call pdgemr2d(mm, nn,  &
5014                  source%buffer_real, ija__(1), ija__(2), source%descript%tab,   &
5015                  out_mat%buffer_real,ijb__(1), ijb__(2), out_mat%descript%tab, &
5016                  source%processor%grid%ictxt)
5017 #endif
5018  else
5019    ABI_ERROR("Neither buffer_cplx nor buffer_real are allocated!")
5020  end if
5021 
5022  if (present(free)) then
5023    if (free) call source%free()
5024  end if
5025 
5026 end subroutine slk_take_from

m_slk/slk_write [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slk_write

FUNCTION

  Routine to write a square scaLAPACK-distributed matrix to an external file using MPI-IO.

INPUTS

  Slk_mat<matrix_scalapack>=Structured datatype defining the scaLAPACK distribution with the local buffer
    containing the distributed matrix.
  uplo=String specifying whether only the upper or lower triangular part of the global matrix is used:
    = "U":  Upper triangular
    = "L":  Lower triangular
    = "A":  Full matrix (used for general complex matrices)
  is_fortran_file=.FALSE. is C stream is used. .TRUE. for writing Fortran binary files.
  [fname]= Mutually exclusive with mpi_fh. The name of the external file on which the matrix will be written.
           The file is open and closed inside the routine with MPI flags specified by flags.
  [mpi_fh]=File handler associated to the file (already open in the caller). Not compatible with fname.
  [flags]=MPI-IO flags used to open the file in MPI_FILE_OPEN.
    Default is MPI_MODE_CREATE + MPI_MODE_WRONLY + MPI_MODE_EXCL.
  [glob_subarray(2,2)] = Used to select the subarray of the global matrix. Used only when uplo="All"
     NOTE that each node should call the routine with the same value.
     glob_subarray(:,1)=starting global coordinates of the subarray in each dimension
       (array of nonnegative integers >=1, <=array_of_sizes)
     glob_subarray(:,2)=Number of elements in each dimension of the subarray (array of positive integers)

OUTPUT

  Only writing. The global scaLAPACK matrix is written to file fname.
  If fname is present then the file is open and closed inside the routine. Any exception is fatal.

SIDE EFFECTS

  [offset]=
    input:  Offset used to access the content of the file. Default is zero.
    output: New offset incremented with the byte size of the matrix that has been read (Fortran
            markers are included if is_fortran_file=.TRUE.)

TODO

  - Generalize the implementation adding the writing the real buffer.
  - This routine should be removed and replaced by hdf5 + mpi-io

SOURCE

5434 subroutine slk_write(Slk_mat, uplo, is_fortran_file, fname,mpi_fh, offset, flags, glob_subarray)
5435 
5436 !Arguments ------------------------------------
5437 !scalars
5438  integer,optional,intent(in) :: flags
5439  integer,optional,intent(inout) :: mpi_fh
5440  integer(XMPI_OFFSET_KIND),optional,intent(inout) :: offset
5441  logical,intent(in) :: is_fortran_file
5442  character(len=*),optional,intent(in) :: fname
5443  character(len=*),intent(in) :: uplo
5444  class(matrix_scalapack),intent(in) :: Slk_mat
5445 !array
5446  integer,optional,intent(in) :: glob_subarray(2,2)
5447 
5448 !Local variables ------------------------------
5449 !scalars
5450 #if defined HAVE_LINALG_SCALAPACK && defined HAVE_MPI_IO
5451  integer :: jloc,iloc,nrows_glob,ncols_glob,elw,nrows_w,ncols_w ! iglob,jglob,
5452  integer :: slk_type,offset_err,etype,nfrec,bsize_elm,mpi_type_elm
5453  integer(XMPI_OFFSET_KIND) :: my_offset
5454  logical :: do_open
5455  integer :: comm,my_flags,my_fh,buffer_size
5456  integer :: ierr,nelw,col_glob ! ij_loc,
5457 !arrays
5458  integer(XMPI_OFFSET_KIND),allocatable :: bsize_frecord(:)
5459  integer,pointer :: elw2slk(:,:)
5460  complex(dpc),allocatable :: buffer1_cplx(:)
5461  character(len=500) :: msg
5462 
5463 !************************************************************************
5464 
5465  ABI_CHECK(allocated(Slk_mat%buffer_cplx), "buffer_cplx not allocated")
5466 
5467  if (firstchar(uplo, ["U","L"]) .and. Slk_mat%sizeb_global(1) /= Slk_mat%sizeb_global(2) ) then
5468    ABI_ERROR("rectangular matrices are not compatible with the specified uplo")
5469  end if
5470 
5471  if (PRESENT(glob_subarray).and. .not. firstchar(uplo, ["A"])) then
5472    ABI_ERROR("glob_subarray should not be used when uplo/=All")
5473  end if
5474 
5475  do_open = PRESENT(fname)
5476  if (do_open) then
5477    ABI_CHECK(.not.PRESENT(fname),"fname should not be present")
5478  else
5479    ABI_CHECK(PRESENT(mpi_fh),"mpi_fh should be present")
5480  end if
5481 
5482  my_offset=0; if (PRESENT(offset)) my_offset=offset
5483 
5484  comm = Slk_mat%processor%comm
5485 
5486  nrows_glob=Slk_mat%sizeb_global(1)
5487  ncols_glob=Slk_mat%sizeb_global(1)
5488  buffer_size= PRODUCT(Slk_mat%sizeb_local(1:2))
5489 
5490  call slk_mat%bsize_and_type(bsize_elm, mpi_type_elm)
5491 
5492  if (do_open) then !Open the file.
5493    my_flags=MPI_MODE_CREATE + MPI_MODE_WRONLY + MPI_MODE_APPEND
5494    if (PRESENT(flags)) my_flags = flags
5495 
5496    call MPI_FILE_OPEN(comm, fname, my_flags, MPI_INFO_NULL, my_fh, ierr)
5497    ABI_CHECK_MPI(ierr, "MPI_FILE_OPEN "//TRIM(fname))
5498  else
5499    my_fh = mpi_fh
5500  end if
5501 
5502  if (PRESENT(glob_subarray)) then
5503    call slk_single_fview_write(Slk_mat,uplo,nelw,elw2slk,etype,slk_type,offset_err,&
5504      is_fortran_file=is_fortran_file,glob_subarray=glob_subarray)
5505  else
5506    call slk_single_fview_write(Slk_mat,uplo,nelw,elw2slk,etype,slk_type,offset_err,&
5507      is_fortran_file=is_fortran_file)
5508  end if
5509 
5510  if (offset_err /= 0) then
5511    write(msg,"(3a)")&
5512     " Global position index cannot be stored in standard Fortran integer ",ch10,&
5513     " scaLAPACK matrix cannot be read with a single MPI-IO call."
5514    ABI_ERROR(msg)
5515  end if
5516 
5517  call MPI_FILE_SET_VIEW(my_fh, my_offset, etype, slk_type, 'native', MPI_INFO_NULL, ierr)
5518  ABI_CHECK_MPI(ierr,"SET_VIEW")
5519 
5520  call MPI_TYPE_FREE(slk_type,ierr)
5521  ABI_CHECK_MPI(ierr,"MPI_type_FREE")
5522 
5523  if (nelw == buffer_size) then
5524    ! Dump Slk_mat% immediately.
5525    call MPI_FILE_WRITE_ALL(my_fh, Slk_mat%buffer_cplx, buffer_size, MPI_DOUBLE_complex, MPI_STATUS_IGNORE, ierr)
5526    ABI_CHECK_MPI(ierr,"WRITE_ALL")
5527  else
5528    ! Have to extract the data to be written.
5529    ABI_MALLOC(buffer1_cplx,(nelw))
5530    do elw=1,nelw
5531      iloc = elw2slk(1,elw)
5532      jloc = elw2slk(2,elw)
5533      buffer1_cplx(elw) = Slk_mat%buffer_cplx(iloc,jloc)
5534    end do
5535    call MPI_FILE_WRITE_ALL(my_fh, buffer1_cplx, nelw, MPI_DOUBLE_complex, MPI_STATUS_IGNORE, ierr)
5536    ABI_CHECK_MPI(ierr,"WRITE_ALL")
5537    ABI_FREE(buffer1_cplx)
5538  end if
5539 
5540  ABI_FREE(elw2slk)
5541  !
5542  ! Number of columns and rows that have been written.
5543  ! Used to write the Fortran markers and to increment the offset.
5544  nrows_w = nrows_glob
5545  ncols_w = ncols_glob
5546  if (PRESENT(glob_subarray)) then
5547    nrows_w = glob_subarray(1,2) - glob_subarray(1,1) + 1
5548    ncols_w = glob_subarray(2,2) - glob_subarray(2,1) + 1
5549    if (.not.firstchar(uplo, ["A"])) then
5550      ABI_ERROR("glob_subarray should not be used when uplo/=All")
5551    end if
5552  end if
5553 
5554  !TODO check whether slk_single_fview_write can report an offset to reduce the extent.
5555  if (is_fortran_file) then ! Collective writing of the Fortran markers.
5556    nfrec = ncols_w
5557    ABI_MALLOC(bsize_frecord,(nfrec))
5558    if (firstchar(uplo, ["A"])) then
5559      bsize_frecord = nrows_w * bsize_elm
5560    else if (firstchar(uplo, ["U"])) then
5561      bsize_frecord = (/(col_glob * bsize_elm, col_glob=1,nfrec)/)
5562    else if (firstchar(uplo, ["L"])) then
5563      bsize_frecord = (/(col_glob * bsize_elm, col_glob=nfrec,1,-1)/)
5564    else
5565      ABI_ERROR("Wrong uplo")
5566    end if
5567    call xmpio_write_frmarkers(mpi_fh,my_offset,xmpio_collective,nfrec,bsize_frecord,ierr)
5568    ABI_CHECK(ierr==0,"Error while writing Fortran markers")
5569    ABI_FREE(bsize_frecord)
5570  end if
5571 
5572  if (do_open) then
5573    ! Close the file.
5574    call MPI_FILE_CLOSE(my_fh, ierr)
5575    ABI_CHECK_MPI(ierr,"FILE_CLOSE")
5576  end if
5577 
5578  ! Increment the offset
5579  if (PRESENT(offset)) then
5580    if (firstchar(uplo, ["A"])) then
5581      offset = offset + nrows_w*ncols_w*bsize_elm
5582      if (is_fortran_file) offset = offset + ncols_w*2*xmpio_bsize_frm
5583    else if (firstchar(uplo, ["U","L"])) then
5584      offset = offset + ( (Slk_mat%sizeb_global(2) * (Slk_mat%sizeb_global(2))+1)/2 ) * bsize_elm
5585      if (is_fortran_file) offset = offset + Slk_mat%sizeb_global(2)*2*xmpio_bsize_frm
5586    else
5587      ABI_ERROR("Wrong uplo")
5588    end if
5589  end if
5590 
5591  call xmpi_barrier(comm)
5592  RETURN
5593 
5594 #else
5595   ABI_ERROR("MPI-IO support not activated")
5596 #endif
5597 
5598 end subroutine slk_write

m_slk/slkmat_check_shape [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slkmat_check_shape

FUNCTION

  Debugging tool to test the local shape `lshape` of the local buffer.
  Return exit status in `ok` and error message in `msg`.

SOURCE

809 logical function slkmat_check_local_shape(mat, lshape, msg) result (ok)
810 
811 !Arguments ------------------------------------
812  class(basemat_t),intent(in) :: mat
813  integer,intent(in) :: lshape(2)
814  character(len=*),intent(out) :: msg
815 
816 ! *********************************************************************
817 
818  msg = ""
819  ok = all(mat%sizeb_local == lshape)
820  if (.not. ok) then
821    msg = sjoin("mat%sizeb_local:", ltoa(mat%sizeb_local), " not equal to input local lshape ", ltoa(lshape))
822    return
823  end if
824 
825  select type (mat)
826  class is (matrix_scalapack)
827    if (allocated(mat%buffer_cplx)) then
828      ok = all(shape(mat%buffer_cplx) == lshape)
829      if (.not. ok) then
830        msg = sjoin("shape(buffer_cplx):", ltoa(shape(mat%buffer_cplx)), " != input local lshape ", ltoa(lshape)); return
831      end if
832    else if (allocated(mat%buffer_real)) then
833      ok = all(shape(mat%buffer_real) == lshape)
834      if (.not. ok) then
835        msg = sjoin("shape(buffer_real):", ltoa(shape(mat%buffer_real)), " != input local lshape ", ltoa(lshape)); return
836      end if
837    end if
838 
839  class is (slkmat_sp_t)
840    ! Same piece of code as above. May use include file!
841    if (allocated(mat%buffer_cplx)) then
842      ok = all(shape(mat%buffer_cplx) == lshape)
843      if (.not. ok) then
844        msg = sjoin("shape(buffer_cplx):", ltoa(shape(mat%buffer_cplx)), " != input local lshape ", ltoa(lshape)); return
845      end if
846    else if (allocated(mat%buffer_real)) then
847      ok = all(shape(mat%buffer_real) == lshape)
848      if (.not. ok) then
849        msg = sjoin("shape(buffer_real):", ltoa(shape(mat%buffer_real)), " != input local lshape ", ltoa(lshape)); return
850      end if
851    end if
852 
853  class default
854    ABI_ERROR("Wrong class")
855  end select
856 
857 end function slkmat_check_local_shape

m_slk/slkmat_print [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slkmat_print

FUNCTION

  Print info on scalapack matrix.

INPUTS

  [unit]=Unit number (default: std_out)
  [header]=title for info
  [prtvol]=Verbosity level (default: 0)

SOURCE

747 subroutine slkmat_print(mat, header, unit, prtvol)
748 
749 !Arguments ------------------------------------
750  class(basemat_t),intent(in) :: mat
751  character(len=*),optional,intent(in) :: header
752  integer,optional,intent(in) :: prtvol, unit
753 
754 !Local variables-------------------------------
755  integer :: unt, my_prtvol, grid_dims(2)
756  character(len=50) :: matrix_dtype
757  character(len=5000) :: msg
758 
759 ! *********************************************************************
760 
761  unt = std_out; if (present(unit)) unt =unit
762  my_prtvol = 0; if (present(prtvol)) my_prtvol = prtvol
763 
764  msg = ' ==== Info on matrix_scalapack ==== '
765  if (present(header)) msg=' ==== '//trim(adjustl(header))//' ==== '
766  call wrtout(unt, msg)
767 
768  matrix_dtype = "undefined"
769  select type (mat)
770  class is (matrix_scalapack)
771    if (allocated(mat%buffer_real)) matrix_dtype = "real"
772    if (allocated(mat%buffer_cplx)) matrix_dtype = "complex"
773  class is (slkmat_sp_t)
774    if (allocated(mat%buffer_real)) matrix_dtype = "real"
775    if (allocated(mat%buffer_cplx)) matrix_dtype = "complex"
776  class default
777    ABI_ERROR("Wrong class")
778  end select
779 
780  grid_dims = [-1, -1]
781  if (associated(mat%processor)) grid_dims = mat%processor%grid%dims
782 
783  write(msg,'(5(3a),a,f8.1,a)') &
784    "  matrix_dtype ..... ", trim(matrix_dtype), ch10, &
785    "  sizeb_global ..... ", trim(ltoa(mat%sizeb_global)), ch10, &
786    "  sizeb_local ...... ", trim(ltoa(mat%sizeb_local)), ch10, &
787    "  sizeb_blocs ...... ", trim(ltoa(mat%sizeb_blocs)), ch10, &
788    "  processor grid  .. ", trim(ltoa(grid_dims)), ch10, &
789    "  memory (Mb) ...... ", mat%locmem_mb(), ch10
790  call wrtout(unt, msg)
791 
792  !if (prtvol > 10) call mat%write(unit)
793 
794 end subroutine slkmat_print

m_slk/slkmat_sp_collect_cplx [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slkmat_sp_collect_cplx

FUNCTION

  Return on all processors the complex submatrix of shape (mm, nn) starting at position ija.
  NB: `out_carr` is allocated by the routine.
  If optional argument request is use, the routine uses non-blocking BCAST and client code is
  supposed to wait before accessing out_carr.

SOURCE

5201 subroutine slkmat_sp_collect_cplx(in_mat, mm, nn, ija, out_carr, request)
5202 
5203 !Arguments ------------------------------------
5204  class(slkmat_sp_t),intent(in) :: in_mat
5205  integer,intent(in) :: mm, nn, ija(2)
5206  complex(sp) ABI_ASYNC, allocatable,intent(out) :: out_carr(:,:)
5207  integer ABI_ASYNC, optional,intent(out) :: request
5208 
5209 !Local variables-------------------------------
5210  integer,parameter :: master = 0
5211  integer :: ierr
5212  type(processor_scalapack) :: self_processor
5213  type(slkmat_sp_t) :: out_mat
5214 
5215 ! *************************************************************************
5216 
5217  ABI_CHECK(allocated(in_mat%buffer_cplx), "buffer_cplx is not allocated")
5218 
5219  if (in_mat%processor%grid%nbprocs == 1) then
5220    ! Copy buffer and return
5221    ABI_MALLOC(out_carr, (mm, nn))
5222    out_carr(:,:) = in_mat%buffer_cplx(ija(1):ija(1)+mm-1, ija(2):ija(2)+nn-1); return
5223  end if
5224 
5225  ! Two-step algorithm:
5226  !     1) Use pzgemr2d to collect submatrix on master.
5227  !     2) Master brodacasts submatrix.
5228 
5229  if (in_mat%processor%myproc == master) then
5230    call self_processor%init(xmpi_comm_self)
5231    call out_mat%init(mm, nn, self_processor, in_mat%istwf_k, size_blocs=[mm, nn])
5232  else
5233    out_mat%descript%tab(CTXT_) = -1
5234  end if
5235 
5236 #ifdef HAVE_LINALG_SCALAPACK
5237  call pcgemr2d(mm, nn,  &
5238                in_mat%buffer_cplx, ija(1), ija(2), in_mat%descript%tab,   &
5239                out_mat%buffer_cplx, 1, 1, out_mat%descript%tab, &
5240                in_mat%processor%grid%ictxt)
5241 #endif
5242 
5243  if (in_mat%processor%myproc == master) then
5244    ABI_MOVE_ALLOC(out_mat%buffer_cplx, out_carr)
5245    call out_mat%free()
5246    call self_processor%free()
5247  else
5248    ABI_MALLOC(out_carr, (mm, nn))
5249  end if
5250 
5251  if (present(request)) then
5252    call xmpi_ibcast(out_carr, master, in_mat%processor%comm, request, ierr)
5253  else
5254    call xmpi_bcast(out_carr, master, in_mat%processor%comm, ierr)
5255  end if
5256 
5257 end subroutine slkmat_sp_collect_cplx

m_slk/slkmat_sp_copy [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slkmat_sp_copy

FUNCTION

  Copy in_mat to out_mat. If empty is True, the values in the local buffer are not copied. Default: False

SOURCE

1094 subroutine slkmat_sp_copy(in_mat, out_mat, empty)
1095 
1096 !Arguments ------------------------------------
1097  class(slkmat_sp_t),intent(in) :: in_mat
1098  class(slkmat_sp_t),intent(out) :: out_mat
1099  logical,optional,intent(in) :: empty
1100 
1101 !Local variables-------------------------------
1102  logical :: empty__
1103 
1104 ! *********************************************************************
1105 
1106  call out_mat%init(in_mat%sizeb_global(1), in_mat%sizeb_global(2), in_mat%processor, in_mat%istwf_k, &
1107                    size_blocs=in_mat%sizeb_blocs)
1108 
1109  empty__ = .False.; if (present(empty)) empty__ = empty
1110  if (.not. empty__) then
1111    if (in_mat%istwf_k == 1) then
1112      out_mat%buffer_cplx = in_mat%buffer_cplx
1113    else
1114      out_mat%buffer_real = in_mat%buffer_real
1115    end if
1116  end if
1117 
1118 end subroutine slkmat_sp_copy

m_slk/slkmat_sp_heev [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

 slkmat_sp_heev

FUNCTION

  slkmat_sp_heev computes selected eigenvalues and, optionally, eigenvectors of an Hermitian matrix A.
   A * X = lambda * X

INPUTS

  JOBZ    (global input) CHARACTER*1
          Specifies whether or not to compute the eigenvectors:
          = "N":  Compute eigenvalues only.
          = "V":  Compute eigenvalues and eigenvectors.
  UPLO    (global input) CHARACTER*1
          Specifies whether the upper or lower triangular part of the symmetric matrix A is stored:
          = "U":  Upper triangular
          = "L":  Lower triangular

  mat=The object storing the local buffer in SINGLE PRECISION, the array descriptor, the PBLAS context.
  vec=The distributed eigenvectors. Not referenced if JOBZ="N"

OUTPUT

  W       (global output) array, dimension (N) where N is the rank of the global matrix.
          On normal exit, the first M entries contain the selected eigenvalues in ascending order.

SIDE EFFECTS

  If JOBZ="V", the local buffer vec%buffer_cplx will contain part of the distributed eigenvectors.
  On exit, the lower triangle (if UPLO='L') or the upper triangle (if UPLO='U') of A, including the diagonal, is destroyed.

SOURCE

3627 subroutine slkmat_sp_heev(mat, jobz, uplo, vec, w, &
3628                           mat_size, ija, ijz) ! Optional
3629 
3630 !Arguments ------------------------------------
3631 !scalars
3632  class(slkmat_sp_t),intent(inout) :: mat
3633  character(len=*),intent(in) :: jobz, uplo
3634  class(slkmat_sp_t),intent(inout) :: vec
3635 !arrays
3636  real(sp),intent(out) :: w(:)
3637  integer,optional,intent(in) :: mat_size, ija(2), ijz(2)
3638 
3639 #ifdef HAVE_LINALG_SCALAPACK
3640 !Local variables ------------------------------
3641 !scalars
3642  integer :: lwork, lrwork, info, nn
3643 !arrays
3644  integer :: ija__(2), ijz__(2)
3645  real(sp),allocatable :: rwork_sp(:)
3646  complex(sp),allocatable :: work_sp(:)
3647 
3648 !************************************************************************
3649 
3650  ABI_CHECK(allocated(mat%buffer_cplx), "buffer_cplx not allocated")
3651 
3652  nn = mat%sizeb_global(2); if (present(mat_size)) nn = mat_size
3653  ija__ = [1, 1]; if (present(ija)) ija__ = ija
3654  ijz__ = [1, 1]; if (present(ijz)) ijz__ = ijz
3655 
3656  ! Get optimal size of workspace.
3657  lwork = - 1; lrwork = -1
3658  ABI_MALLOC(work_sp, (1))
3659  ABI_MALLOC(rwork_sp, (1))
3660 
3661  !call pzheev(jobz, uplo, n, a, ia, ja, desca, w, z, iz, jz, descz, work, lwork, rwork, lrwork, info)
3662 
3663  call PCHEEV(jobz, uplo, nn, mat%buffer_cplx, ija__(1), ija__(2), mat%descript%tab, &
3664              w, vec%buffer_cplx, ijz__(1), ijz__(2), vec%descript%tab, work_sp, lwork, rwork_sp, lrwork, info)
3665  ABI_CHECK(info == 0, sjoin("Error in the calculation of the workspace size, info:", itoa(info)))
3666 
3667  lwork = NINT(real(work_sp(1))); lrwork= NINT(rwork_sp(1)) !*2
3668  ABI_FREE(work_sp)
3669  ABI_FREE(rwork_sp)
3670 
3671  ! MG: Nov 23 2011. On my mac with the official scalapack package, rwork(1) is not large enough and causes a SIGFAULT.
3672  if (firstchar(jobz, ['V'])) then
3673    if (lrwork < 2*nn + 2*nn-2) lrwork = 2*nn + 2*nn-2
3674  else if (firstchar(jobz, ['N'])) then
3675    if (lrwork < 2*nn) lrwork = 2*nn
3676  end if
3677  !write(std_out,*)lwork,lrwork
3678 
3679  ! Solve the problem.
3680  ABI_MALLOC(work_sp, (lwork))
3681  ABI_MALLOC(rwork_sp, (lrwork))
3682 
3683  call PCHEEV(jobz, uplo, nn, mat%buffer_cplx, ija__(1), ija__(2), mat%descript%tab, &
3684              w, vec%buffer_cplx, ijz__(1), ijz__(2), vec%descript%tab, work_sp, lwork, rwork_sp, lrwork, info)
3685  ABI_CHECK(info == 0, sjoin("PCHEEV returned info:", itoa(info)))
3686  ABI_FREE(work_sp)
3687  ABI_FREE(rwork_sp)
3688 #endif
3689 
3690 end subroutine slkmat_sp_heev

m_slk/slkmat_sp_hpd_invert [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

 slkmat_sp_hpd_invert

FUNCTION

  Compute the inverse of an Hermitian positive definite matrix.

INPUTS

  uplo: global input
    = 'U':  Upper triangle of sub( A ) is stored;
    = 'L':  Lower triangle of sub( A ) is stored.
  [full]: If full PBLAS matrix is neeeded. Default: True

SIDE EFFECTS

  mat= The object storing the local buffer, the array descriptor, the context, etc.
    On entry, this array contains the local pieces of the N-by-N Hermitian distributed matrix sub( A ) to be factored.
    If UPLO = 'U', the leading N-by-N upper triangular part of sub( A ) contains the upper triangular part of the matrix,
    and its strictly lower triangular part is not referenced.
    If UPLO = 'L', the leading N-by-N lower triangular part of sub( A ) contains the lower triangular part of the distribu-
    ted matrix, and its strictly upper triangular part is not referenced.
    On exit, the local pieces of the upper or lower triangle of the (Hermitian) inverse of sub( A )

SOURCE

4481 subroutine slkmat_sp_hpd_invert(mat, uplo, full)
4482 
4483 !Arguments ------------------------------------
4484  character(len=*),intent(in) :: uplo
4485  class(slkmat_sp_t),intent(inout) :: mat
4486  logical,optional,intent(in) :: full
4487 
4488 #ifdef HAVE_LINALG_SCALAPACK
4489 !Local variables ------------------------------
4490 !scalars
4491  integer :: info, mm, il1, il2, iglob1, iglob2
4492  type(slkmat_sp_t) :: work_mat
4493  logical :: full__
4494 
4495 !************************************************************************
4496 
4497  ABI_CHECK(allocated(mat%buffer_cplx), "buffer_cplx not allocated")
4498 
4499  ! ZPOTRF computes the Cholesky factorization of a complex Hermitian positive definite.
4500  !  A = U**H * U,   if UPLO = 'U', or
4501  !  A = L  * L**H,  if UPLO = 'L',
4502  mm = mat%sizeb_global(1)
4503  call PCPOTRF(uplo, mm, mat%buffer_cplx, 1, 1, mat%descript%tab, info)
4504  ABI_CHECK(info == 0, sjoin("PCPOTRF returned info:", itoa(info)))
4505 
4506  ! PZPOTRI computes the inverse of a complex Hermitian positive definite
4507  ! distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1) using the
4508  ! Cholesky factorization sub( A ) = U**H*U or L*L**H computed by PZPOTRF.
4509  call PCPOTRI(uplo, mm, mat%buffer_cplx, 1, 1, mat%descript%tab, info)
4510  ABI_CHECK(info == 0, sjoin("PCPOTRI returned info:", itoa(info)))
4511 
4512  full__ = .True.; if (present(full)) full__ = full
4513  if (full__) then
4514    ! Only the uplo part contains the inverse so we need to fill the other triangular part.
4515    !     1)  Fill the missing triangle with zeros and copy results to work_mat
4516    !     2)  Call pzgeadd to compute: sub(C) := beta*sub(C) + alpha*op(sub(A))
4517    !     3)  Divide diagonal elements by two.
4518 
4519    do il2=1,mat%sizeb_local(2)
4520      iglob2 = mat%loc2gcol(il2)
4521      do il1=1,mat%sizeb_local(1)
4522        iglob1 = mat%loc2grow(il1)
4523        if (uplo == "L" .and. iglob2 > iglob1) mat%buffer_cplx(il1, il2) = zero_sp
4524        if (uplo == "U" .and. iglob2 < iglob1) mat%buffer_cplx(il1, il2) = zero_sp
4525      end do
4526    end do
4527 
4528    call mat%copy(work_mat, empty=.False.)
4529 
4530    ! call pzgeadd(trans, m, n, alpha, a, ia, ja, desca, beta, c, ic, jc, descc)
4531    ! sub(C) := beta*sub(C) + alpha*op(sub(A))
4532    call pcgeadd("C", mm, mm, cone_sp, work_mat%buffer_cplx, 1, 1, work_mat%descript%tab, &
4533          cone_sp, mat%buffer_cplx, 1, 1, mat%descript%tab)
4534    call work_mat%free()
4535 
4536    do il2=1,mat%sizeb_local(2)
4537      iglob2 = mat%loc2gcol(il2)
4538      do il1=1,mat%sizeb_local(1)
4539        iglob1 = mat%loc2grow(il1)
4540        if (iglob2 == iglob1) mat%buffer_cplx(il1, il2) = 0.5_sp * mat%buffer_cplx(il1, il2)
4541      end do
4542    end do
4543  end if ! full__
4544 #endif
4545 
4546 end subroutine slkmat_sp_hpd_invert

m_slk/slkmat_sp_ptrans [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slkmat_sp_ptrans

FUNCTION

 Transposes a matrix

    sub( C ) := beta*sub( C ) + alpha*op( sub( A ) )

 where

    sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1),
    sub( A ) denotes A(IA:IA+N-1,JA:JA+M-1), and, op( X ) = X'.

 Thus, op( sub( A ) ) denotes A(IA:IA+N-1,JA:JA+M-1)'.
 Beta is a scalar, sub( C ) is an m by n submatrix, and sub( A ) is an n by m submatrix.

INPUTS

  [ija(2)]: (global) The row and column indices in the distributed matrix in_mat indicating
       the first row and the first column of the submatrix sub(A), respectively.
  [ijc(2)]: (global) The row and column indices in the distributed matrix out_mat
   indicating the first row and the first column of the submatrix sub(C), respectively.
  [free]: True to deallocate in_mat. Default: False

SOURCE

4687 subroutine slkmat_sp_ptrans(in_mat, trans, out_mat, &
4688                       out_gshape, ija, ijc, size_blocs, alpha, beta, free) ! optional
4689 
4690 !Arguments ------------------------------------
4691  class(slkmat_sp_t),intent(inout) :: in_mat
4692  character(len=1),intent(in) :: trans
4693  class(slkmat_sp_t),intent(inout) :: out_mat
4694  integer,optional,intent(in) :: out_gshape(2), size_blocs(2), ija(2), ijc(2)
4695  complex(sp),optional,intent(in) :: alpha, beta
4696  logical,optional,intent(in) :: free
4697 
4698 !Local variables-------------------------------
4699  integer :: sb, mm, nn, size_blocs__(2)
4700  real(sp) :: ralpha__, rbeta__
4701  integer :: ija__(2), ijc__(2)
4702  complex(sp) :: calpha__, cbeta__
4703 
4704 ! *************************************************************************
4705 
4706  ija__ = [1, 1]; if (present(ija)) ija__ = ija
4707  ijc__ = [1, 1]; if (present(ijc)) ijc__ = ijc
4708 
4709  ! transposed output (sub)matrix has shape (nn, mm)
4710  if (present(out_gshape)) then
4711    nn = out_gshape(1)
4712    mm = out_gshape(2)
4713  else
4714    nn = in_mat%sizeb_global(2)
4715    mm = in_mat%sizeb_global(1)
4716  end if
4717 
4718  if (present(size_blocs)) then
4719    size_blocs__ = size_blocs
4720  else
4721    ! FIXME: This can cause problems if I start to use round-robin distribution in GWR!!!!!
4722    size_blocs__(1) = in_mat%sizeb_global(2)
4723    sb = in_mat%sizeb_global(1) / in_mat%processor%grid%dims(2)
4724    if (mod(in_mat%sizeb_global(1), in_mat%processor%grid%dims(2)) /= 0) sb = sb + 1
4725    size_blocs__(2) = sb
4726    !size_blocs__(2) = in_mat%sizeb_blocs(1); size_blocs__(1) = in_mat%sizeb_blocs(2)
4727  end if
4728 
4729  call out_mat%init(nn, mm, in_mat%processor, in_mat%istwf_k, size_blocs=size_blocs__)
4730 
4731  ! prototype: call pdtran(m, n, alpha, a, ia, ja, desca, beta, c, ic, jc, descc)
4732 
4733  if (allocated(in_mat%buffer_cplx)) then
4734 #ifdef HAVE_LINALG_SCALAPACK
4735    select case (trans)
4736    case ("N")
4737      ! sub(C) := beta*sub(C) + alpha*sub(A)',
4738      calpha__ = cone_sp; if (present(alpha)) calpha__ = alpha
4739      cbeta__ = czero_sp; if (present(beta)) cbeta__ = beta
4740      call pctranu(nn, mm, calpha__, in_mat%buffer_cplx, ija__(1), ija__(2), &
4741                   in_mat%descript%tab, cbeta__, out_mat%buffer_cplx, ijc__(1), ijc__(2), out_mat%descript%tab)
4742 
4743    case ("C")
4744      ! sub(C) := beta * sub(C) + alpha * conjg(sub(A)')
4745      calpha__ = cone_sp; if (present(alpha)) calpha__ = alpha
4746      cbeta__ = czero_sp; if (present(beta)) cbeta__ = beta
4747      call pctranc(nn, mm, calpha__, in_mat%buffer_cplx, ija__(1), ija__(2), &
4748                   in_mat%descript%tab, cbeta__, out_mat%buffer_cplx, ijc__(1), ijc__(2), out_mat%descript%tab)
4749 
4750    case default
4751      ABI_ERROR(sjoin("Invalid value for trans:", trans))
4752    end select
4753 
4754  else if (allocated(in_mat%buffer_real)) then
4755      ralpha__ = one_sp; if (present(alpha)) ralpha__ = real(alpha)
4756      rbeta__ = zero_sp; if (present(beta)) rbeta__ = real(beta)
4757      call pstran(nn, mm, ralpha__, in_mat%buffer_real, ija__(1), ija__(2), &
4758                  in_mat%descript%tab, rbeta__, out_mat%buffer_real, ijc__(1), ijc__(2), out_mat%descript%tab)
4759 #endif
4760  else
4761    ABI_ERROR("Neither buffer_cplx nor buffer_real are allocated!")
4762  end if
4763 
4764  if (present(free)) then
4765    if (free) call in_mat%free()
4766  end if
4767 
4768 end subroutine slkmat_sp_ptrans

m_slk/slkmat_sp_set_head_and_wings [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slkmat_sp_set_head_and_wings

FUNCTION

  Set head and the wings of the matrix starting from global arrays.

SOURCE

 998 subroutine slkmat_sp_set_head_and_wings(mat, head, low_wing, up_wing)
 999 
1000 !Arguments ------------------------------------
1001  class(slkmat_sp_t),intent(inout) :: mat
1002  complex(sp),intent(in) :: head, low_wing(mat%sizeb_global(1)), up_wing(mat%sizeb_global(2))
1003 
1004 !Local variables-------------------------------
1005  integer :: il_g1, il_g2, iglob1, iglob2
1006  logical :: is_cplx
1007 
1008 ! *********************************************************************
1009 
1010  is_cplx = allocated(mat%buffer_cplx)
1011 
1012  do il_g2=1,mat%sizeb_local(2)
1013    iglob2 = mat%loc2gcol(il_g2)
1014    do il_g1=1,mat%sizeb_local(1)
1015      iglob1 = mat%loc2grow(il_g1)
1016 
1017      if (iglob1 == 1 .or. iglob2 == 1) then
1018        if (iglob1 == 1 .and. iglob2 == 1) then
1019          if (is_cplx) then
1020            mat%buffer_cplx(il_g1, il_g2) = head
1021          else
1022            mat%buffer_real(il_g1, il_g2) = real(head)
1023          end if
1024        else if (iglob1 == 1) then
1025          if (is_cplx) then
1026            mat%buffer_cplx(il_g1, il_g2) = up_wing(iglob2)
1027          else
1028            mat%buffer_real(il_g1, il_g2) = real(up_wing(iglob2))
1029          end if
1030        else if (iglob2 == 1) then
1031          if (is_cplx) then
1032            mat%buffer_cplx(il_g1, il_g2) = low_wing(iglob1)
1033          else
1034            mat%buffer_real(il_g1, il_g2) = real(low_wing(iglob1))
1035          end if
1036        end if
1037      end if
1038 
1039    end do
1040  end do
1041 
1042 end subroutine slkmat_sp_set_head_and_wings

m_slk/slkmat_sp_t [ Types ]

[ Top ] [ m_slk ] [ Types ]

NAME

  slkmat_sp_t

FUNCTION

  high-level interface to ScaLAPACK matrix (single precision version)

SOURCE

287  type, public, extends(basemat_t) :: slkmat_sp_t
288 
289    real(sp),allocatable :: buffer_real(:,:)
290     ! local part of the (real) matrix.
291     ! The istwf_k option passed to the constructor defines whether we have a real or complex matrix
292 
293    complex(sp),allocatable :: buffer_cplx(:,:)
294     ! local part of the (complex) matrix
295 
296  contains
297 
298    procedure :: copy => slkmat_sp_copy
299     ! Copy object
300 
301    procedure :: take_from => slkmat_sp_take_from
302     ! Take values from source
303 
304    procedure :: ptrans => slkmat_sp_ptrans
305     ! Transpose matrix
306 
307    procedure :: set_head_and_wings => slkmat_sp_set_head_and_wings
308     ! Set head and the wings of the matrix starting from global arrays.
309 
310    !procedure :: cut => slk_cut
311     ! Extract submatrix and create new matrix with `size_blocs` and `processor`
312 
313    procedure :: collect_cplx => slkmat_sp_collect_cplx
314     ! Return on all processors the submatrix of shape (mm, nn) starting at position ija.
315 
316    procedure :: heev => slkmat_sp_heev
317     ! Compute eigenvalues and, optionally, eigenvectors of an Hermitian matrix A. A * X = lambda * X
318 
319    procedure :: hpd_invert => slkmat_sp_hpd_invert
320     ! Inverse of a Hermitian positive definite matrix.
321 
322  end type slkmat_sp_t

m_slk/slkmat_sp_take_from [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  slkmat_sp_take_from

FUNCTION

  Take values from source
  NB: This routine should be called by all procs owning mat and source.

INPUTS

  [free]: True if source should be deallocated. Default: False

OUTPUT

SOURCE

5046 subroutine slkmat_sp_take_from(out_mat, source, &
5047                          ija, ijb, free) ! optional
5048 
5049 !Arguments ------------------------------------
5050  class(slkmat_sp_t),intent(inout) :: out_mat
5051  class(slkmat_sp_t),intent(inout) :: source
5052  integer,optional,intent(in) :: ija(2), ijb(2)
5053  logical,optional,intent(in) :: free
5054 
5055 !Local variables-------------------------------
5056  integer :: mm, nn
5057  character(len=500) :: msg
5058  integer :: ija__(2), ijb__(2)
5059 
5060 ! *************************************************************************
5061 
5062  ! prototype
5063  !call pzgemr2d(m, n, a, ia, ja, desca, b, ib, jb, descb, ictxt)
5064 
5065  ! Take care when context A is disjoint from context B. The general rules for which parameters need to be set are:
5066  !
5067  !   - All calling processes must have the correct m and n.
5068  !   - Processes in context A must correctly define all parameters describing A.
5069  !   - Processes in context B must correctly define all parameters describing B.
5070  !   - Processes which are not members of context A must pass ctxt_a = -1 and need not set other parameters describing A.
5071  !   - Processes which are not members of contextB must pass ctxt_b = -1 and need not set other parameters describing B.
5072 
5073  mm = source%sizeb_global(1)
5074  nn = source%sizeb_global(2)
5075 
5076  ija__ = [1, 1]; if (present(ija)) ija__ = ija
5077  ijb__ = [1, 1]; if (present(ijb)) ijb__ = ijb
5078 
5079  if (all(out_mat%sizeb_global == -1)) then
5080    out_mat%descript%tab(CTXT_) = -1
5081  else
5082    ABI_CHECK_IEQ(out_mat%istwf_k, source%istwf_k, "istwfk_mat /= istwfk_source")
5083    if (any(out_mat%sizeb_global /= source%sizeb_global)) then
5084      msg = sjoin("Matrices should have same global shape but out_mat:", ltoa(out_mat%sizeb_global), &
5085                  "source:", ltoa(source%sizeb_global))
5086      ABI_ERROR(msg)
5087    end if
5088  end if
5089 
5090  if (allocated(source%buffer_cplx)) then
5091 #ifdef HAVE_LINALG_SCALAPACK
5092    call pcgemr2d(mm, nn,  &
5093                  source%buffer_cplx, ija__(1), ija__(2), source%descript%tab,   &
5094                  out_mat%buffer_cplx, ijb__(1), ijb__(2), out_mat%descript%tab, &
5095                  source%processor%grid%ictxt)
5096 
5097  else if (allocated(source%buffer_real)) then
5098    call psgemr2d(mm, nn,  &
5099                  source%buffer_real, ija__(1), ija__(2), source%descript%tab,   &
5100                  out_mat%buffer_real,ijb__(1), ijb__(2), out_mat%descript%tab, &
5101                  source%processor%grid%ictxt)
5102 #endif
5103  else
5104    ABI_ERROR("Neither buffer_cplx nor buffer_real are allocated!")
5105  end if
5106 
5107  if (present(free)) then
5108    if (free) call source%free()
5109  end if
5110 
5111 end subroutine slkmat_sp_take_from

m_slk/solve_gevp_complex [ Functions ]

[ Top ] [ m_slk ] [ Functions ]

NAME

  solve_gevp_complex

FUNCTION

  Calculation of eigenvalues and eigenvectors: A * X = lambda * B * X
  complex and real cases.

INPUTS

  processor= descriptor of a processor
  matrix1= first ScaLAPACK matrix (matrix A)
  matrix2= second ScaLAPACK matrix (matrix B)
  comm= MPI communicator
  istwf_k= 2 if we have a real matrix else complex.
  [use_gpu_elpa]= Flag to activate the use of GPU (ELPA only)

SIDE EFFECTS

  results= ScaLAPACK matrix coming out of the operation
  eigen= eigenvalues of the matrix

SOURCE

2880 #ifdef HAVE_LINALG_ELPA
2881 
2882 subroutine solve_gevp_complex(na,nev,na_rows,na_cols,nblk,a,b,ev,z,tmp1,tmp2, &
2883 &                             my_prow,my_pcol,np_rows,np_cols,sc_desc,comm,&
2884 &                             use_gpu_elpa) ! Optional parameter
2885 
2886   !-Arguments
2887   integer,intent(in) :: na
2888   integer,intent(in) :: nev
2889   integer,intent(in) :: na_rows,na_cols
2890   integer,intent(in) :: nblk
2891   integer,intent(in) :: my_pcol,my_prow
2892   integer,intent(in) :: np_cols,np_rows
2893   integer,intent(in) :: sc_desc(9)
2894   integer,intent(in) :: comm
2895   integer,optional,intent(in) :: use_gpu_elpa
2896   real*8 :: ev(na)
2897   complex*16 :: a(na_rows,na_cols),b(na_rows,na_cols),z(na_rows,na_cols)
2898   complex*16 :: tmp1(na_rows,na_cols),tmp2(na_rows,na_cols)
2899   !-Local variables
2900   integer :: i, n_col, n_row, use_gpu_elpa_
2901   integer,external :: indxl2g,numroc
2902   complex*16, parameter :: CZERO = (0.d0,0.d0), CONE = (1.d0,0.d0)
2903   type(elpa_hdl_t) :: elpa_hdl
2904 
2905 ! *************************************************************************
2906   
2907   use_gpu_elpa_=0
2908 #ifdef HAVE_LINALG_ELPA
2909   if (present(use_gpu_elpa)) use_gpu_elpa_=use_gpu_elpa
2910 #endif
2911 
2912 ! Allocate ELPA handle
2913   call elpa_func_allocate(elpa_hdl,blacs_ctx=sc_desc(CTXT_),gpu=use_gpu_elpa_)
2914   call elpa_func_set_matrix(elpa_hdl,na,nblk,nev,na_rows,na_cols)
2915   call elpa_func_get_communicators(elpa_hdl,comm,my_prow,my_pcol)
2916 
2917   call elpa_func_solve_gevp_2stage(elpa_hdl,a,b,z,ev,nev)
2918 
2919   call elpa_func_deallocate(elpa_hdl)
2920 
2921 end subroutine solve_gevp_complex
2922 
2923 !----------------------------------------------------------------------
2924 
2925 subroutine solve_gevp_real(na,nev,na_rows,na_cols,nblk,a,b,ev,z,tmp1,tmp2, &
2926 &                          my_prow,my_pcol,np_rows,np_cols,sc_desc,comm, &
2927 &                          use_gpu_elpa) ! Optional argument
2928 
2929   !-Arguments
2930   integer,intent(in) :: na
2931   integer,intent(in) :: nev
2932   integer,intent(in) :: na_rows,na_cols
2933   integer,intent(in) :: nblk
2934   integer,intent(in) :: my_pcol,my_prow
2935   integer,intent(in) :: np_cols,np_rows
2936   integer,intent(in) :: sc_desc(9)
2937   integer,intent(in) :: comm
2938   integer,optional,intent(in) :: use_gpu_elpa
2939   real*8 :: ev(na)
2940   real*8 :: a(na_rows,na_cols),b(na_rows,na_cols),z(na_rows,na_cols)
2941   real*8::tmp1(na_rows,na_cols),tmp2(na_rows,na_cols)
2942   !-Local variables
2943   integer :: i, n_col, n_row, use_gpu_elpa_
2944   integer,external :: indxl2g,numroc
2945   type(elpa_hdl_t) :: elpa_hdl
2946 
2947 ! *************************************************************************
2948 
2949   use_gpu_elpa_=0
2950 #ifdef HAVE_LINALG_ELPA
2951   if (present(use_gpu_elpa)) use_gpu_elpa_=use_gpu_elpa
2952 #endif
2953 
2954 ! Allocate ELPA handle
2955   call elpa_func_allocate(elpa_hdl,blacs_ctx=sc_desc(CTXT_),gpu=use_gpu_elpa_)
2956   call elpa_func_set_matrix(elpa_hdl,na,nblk,nev,na_rows,na_cols)
2957   call elpa_func_get_communicators(elpa_hdl,comm,my_prow,my_pcol)
2958 
2959   !FIXME Need to figure out why generalized_eigenvectors doesn't work in this real case
2960   !      while it is fine with complex case
2961   if(.false.) then
2962     call elpa_func_solve_gevp_2stage(elpa_hdl,a,b,tmp1,ev,nev)
2963   else
2964     ! 1. Calculate Cholesky factorization of Matrix B = U**T * U
2965     !    and invert triangular matrix U
2966     call elpa_func_cholesky(elpa_hdl,b)
2967     call elpa_func_invert_triangular(elpa_hdl,b)
2968     ! 2. Calculate U**-T * A * U**-1
2969     ! 2a. tmp1 = U**-T * A
2970     call elpa_func_hermitian_multiply(elpa_hdl,'U','L',na,b,a,na_rows,na_cols,tmp1,na_rows,na_cols)
2971     ! 2b. tmp2 = tmp1**T
2972     call pdtran(na,na,1.d0,tmp1,1,1,sc_desc,0.d0,tmp2,1,1,sc_desc)
2973     ! 2c. A =  U**-T * tmp2 ( = U**-T * Aorig * U**-1 )
2974     call elpa_func_hermitian_multiply(elpa_hdl,'U','U',na,b,tmp2,na_rows,na_cols,a,na_rows,na_cols)
2975     ! A is only set in the upper half, solve_evp_real needs a full matrix
2976     ! Set lower half from upper half
2977     call pdtran(na,na,1.d0,a,1,1,sc_desc,0.d0,tmp1,1,1,sc_desc)
2978     do i=1,na_cols
2979        ! Get global column corresponding to i and number of local rows up to
2980        ! and including the diagonal, these are unchanged in A
2981        n_col = indxl2g(i,     nblk, my_pcol, 0, np_cols)
2982        n_row = numroc (n_col, nblk, my_prow, 0, np_rows)
2983        a(n_row+1:na_rows,i) = tmp1(n_row+1:na_rows,i)
2984     enddo
2985     ! 3. Calculate eigenvalues/eigenvectors of U**-T * A * U**-1
2986     !    Eigenvectors go to tmp1
2987     call elpa_func_solve_evp_1stage(elpa_hdl,a,tmp1,ev,nev)
2988     ! 4. Backtransform eigenvectors: Z = U**-1 * tmp1
2989     !    hermitian_multiply needs the transpose of U**-1, thus tmp2 = (U**-1)**T
2990     call pdtran(na,na,1.d0,b,1,1,sc_desc,0.d0,tmp2,1,1,sc_desc)
2991     call elpa_func_hermitian_multiply(elpa_hdl,'L','N',nev,tmp2,tmp1,na_rows,na_cols,z,na_rows,na_cols)
2992   end if
2993 
2994   call elpa_func_deallocate(elpa_hdl)
2995 
2996  end subroutine solve_gevp_real