TABLE OF CONTENTS


ABINIT/m_gwls_QR_factorization [ Modules ]

[ Top ] [ Modules ]

NAME

 m_gwls_QR_factorization

FUNCTION

  .

COPYRIGHT

 Copyright (C) 2009-2018 ABINIT group (JLJ, BR, MC)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .

PARENTS

CHILDREN

SOURCE

21 #if defined HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24 
25 #include "abi_common.h"
26 
27 
28 
29 module m_gwls_QR_factorization
30 !----------------------------------------------------------------------------------------------------
31 ! This module implements the QR factorization using various algorithms, for the specific
32 ! data distribution corresponding to FFT parallelism.
33 !
34 ! There are standard routines which do this (lapack, scalapack, etc), however it is complicated
35 ! to get scalapack to run properly in parallel. Implementing ourselves is the shortest path to
36 ! a working solution.
37 !----------------------------------------------------------------------------------------------------
38 !local modules
39 use m_gwls_utility
40 use m_gwls_TimingLog
41 use m_gwls_wf
42 use m_gwls_hamiltonian
43 
44 !abinit modules
45 use defs_basis
46 use defs_datatypes
47 use defs_abitypes
48 use defs_wvltypes
49 use m_abicore
50 use m_xmpi
51 use m_errors
52 
53 use m_io_tools,  only : get_unit
54 use m_time,      only : timab
55 
56 
57 implicit none
58 save
59 private

m_hamiltonian/extract_QR [ Functions ]

[ Top ] [ m_hamiltonian ] [ Functions ]

NAME

  extract_QR

FUNCTION

  .

INPUTS

OUTPUT

PARENTS

      gwls_GWlanczos,gwls_QR_factorization

CHILDREN

      xmpi_allgather,xmpi_sum

SOURCE

 89 subroutine extract_QR(mpi_communicator,Hsize,Xsize,Xmatrix,Rmatrix)
 90 !--------------------------------------------------------------------------
 91 ! This function computes the QR factorization:
 92 !
 93 !                X = Q . R
 94 ! 
 95 ! in order to extract the matrix of orthonormal vectors Q and 
 96 ! the R matrix.
 97 !
 98 ! On output, the matrix X is replaced by Q.
 99 !
100 ! If the code is running with only one processor, this routine 
101 ! simply invokes extract_QR_serial, which wraps standard Lapack routines.
102 ! If we are running in MPI parallel, the serial Lapack routines no 
103 ! longer work (and understanding scalapack is too complicated right now).
104 ! Thus, in that case, this routine implements some old school Gram-Schmidt
105 ! algorithm.
106 !--------------------------------------------------------------------------
107 
108 !This section has been created automatically by the script Abilint (TD).
109 !Do not modify the following lines by hand.
110 #undef ABI_FUNC
111 #define ABI_FUNC 'extract_QR'
112 !End of the abilint section
113 
114 implicit none
115 
116 integer,        intent(in) :: Hsize, Xsize, mpi_communicator
117 complex(dpc),intent(inout) :: Xmatrix(Hsize,Xsize)  
118 
119 complex(dpc),  intent(out),optional :: Rmatrix(Xsize,Xsize)  
120 
121 ! local variables
122 
123 real(dp) :: tsec(2)
124 integer :: GWLS_TIMAB, OPTION_TIMAB
125 
126 ! *************************************************************************
127 
128 
129 
130 GWLS_TIMAB   = 1519
131 OPTION_TIMAB = 1
132 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
133 
134 
135 !--------------------------------------------------------------------------------
136 ! Implement Gram-Schmidt.
137 !--------------------------------------------------------------------------------
138 call extract_QR_Householder(mpi_communicator,Hsize,Xsize,Xmatrix,Rmatrix)
139 
140 OPTION_TIMAB = 2
141 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
142 
143 end subroutine extract_QR

m_hamiltonian/extract_QR_Householder [ Functions ]

[ Top ] [ m_hamiltonian ] [ Functions ]

NAME

  extract_QR_Householder

FUNCTION

  .

INPUTS

OUTPUT

PARENTS

      gwls_QR_factorization

CHILDREN

      xmpi_allgather,xmpi_sum

SOURCE

425 subroutine extract_QR_Householder(mpi_communicator,Hsize,Xsize,Xmatrix,Rmatrix)
426 !--------------------------------------------------------------------------
427 ! This function computes the QR factorization:
428 !
429 !                X = Q . R
430 ! 
431 ! in order to extract the matrix of orthonormal vectors Q and 
432 ! the R matrix.
433 !
434 ! On output, the matrix X is replaced by Q.
435 !
436 ! This routine uses Householder operations to generate Q and R.
437 ! Special attention is given to the fact that the matrix may be 
438 ! distributed across processors and MPI communication is necessary.
439 ! 
440 !--------------------------------------------------------------------------
441 
442 !This section has been created automatically by the script Abilint (TD).
443 !Do not modify the following lines by hand.
444 #undef ABI_FUNC
445 #define ABI_FUNC 'extract_QR_Householder'
446 !End of the abilint section
447 
448 implicit none
449 
450 integer,        intent(in) :: Hsize, Xsize, mpi_communicator
451 complex(dpc),intent(inout) :: Xmatrix(Hsize,Xsize)  
452 
453 complex(dpc),  intent(out),optional :: Rmatrix(Xsize,Xsize)  
454 
455 ! local variables
456 integer        :: numbrer_of_plane_waves 
457 
458 integer        :: io_unit
459 integer        :: ierr
460 character(50)  :: filename
461 logical        :: file_exists
462 
463 integer,save :: counter = 0
464 
465 integer :: i, j, l_local
466 integer :: l1, l2
467 
468 integer, allocatable      :: nproc_array(:)
469 
470 complex(dpc), allocatable :: Qinternal(:,:)
471 complex(dpc), allocatable :: Rinternal(:,:)
472 complex(dpc), allocatable :: vj(:)
473 
474 complex(dpc), allocatable :: A_matrix(:,:)
475 complex(dpc), allocatable :: V_matrix(:,:)
476 
477 complex(dpc), allocatable :: list_beta(:)
478 
479 complex(dpc) :: cmplx_value
480 real   (dp ) :: real_value
481 
482 complex(dpc) :: norm_x
483 complex(dpc) :: phase
484 
485 complex(dpc), allocatable :: error(:,:)
486 complex(dpc), allocatable :: coeff(:)
487 
488 
489 integer :: mpi_rank
490 integer :: mpi_nproc
491 logical :: head_node    
492 
493 ! *************************************************************************
494 
495 
496 !--------------------------------------------------------------------------------
497 ! Implement Householder algorithm, in parallel
498 !--------------------------------------------------------------------------------
499 mpi_nproc        = xmpi_comm_size(mpi_communicator)
500 
501 ! extract the rank of this processor in the communicator
502 mpi_rank         = xmpi_comm_rank(mpi_communicator)
503 
504 ! only head node will write!
505 head_node        = mpi_rank == 0
506 
507 
508 !--------------------------------------------------------------------------------
509 ! Open a log file for the output of extract_QR
510 !--------------------------------------------------------------------------------
511 if (debug .and.  head_node ) then
512 
513   io_unit  = get_unit()
514   write(filename,'(A,I0.4,A)') "extract_QR_",mpi_rank,".log"
515   inquire(file=trim(filename),exist=file_exists)
516 
517   if (file_exists) then
518     open(io_unit,file=trim(filename),position='append',status=files_status_old)
519   else
520     open(io_unit,file=trim(filename),status=files_status_new)
521     write(io_unit,10) "#======================================================================================="
522     write(io_unit,10) "#                                                                                       "
523     write(io_unit,10) "#   This file contains information regarding the QR factorization, from extract_QR      "
524     write(io_unit,25) "#   The algorithm is running in MPI parallel with ",mpi_nproc," processors"
525     write(io_unit,10) "#                                                                                       "
526     write(io_unit,10) "#======================================================================================="
527   end if        
528 
529   counter = counter + 1
530 
531   write(io_unit,10) "#                                                                                       "
532   write(io_unit,11) "#   Call # ", counter
533   write(io_unit,10) "#                                                                                       "
534   write(io_unit,11) "#                        Hsize    = ",Hsize
535   write(io_unit,11) "#                        Xsize    = ",Xsize
536   write(io_unit,13) "#                Rmatrix present? = ",present(Rmatrix)
537 
538 
539 end if
540 
541 !--------------------------------------------------------------------------------
542 ! Get the number of plane waves on every processor
543 !--------------------------------------------------------------------------------
544 ABI_ALLOCATE(nproc_array,(mpi_nproc))
545 
546 nproc_array = 0
547 
548 numbrer_of_plane_waves = Hsize ! do this to avoid "intent" problems
549 call xmpi_allgather(numbrer_of_plane_waves, nproc_array, mpi_communicator, ierr)
550 
551 
552 !--------------------------------------------------------------------------------
553 ! Get the offset for each processor
554 !
555 ! The global index is then given by
556 !                   I_{global} = nproc_array(1+rank)+i_{local}
557 !
558 ! similarly, the local index is given by
559 !                  i_{local} = I_{global} - nproc_array(1+rank)
560 ! which is only meaningful if 1 <= i_{local} <= Hsize
561 !--------------------------------------------------------------------------------
562 
563 do j = mpi_nproc, 1, -1
564 nproc_array(j) = 0
565 do i = 1, j-1
566 nproc_array(j) = nproc_array(j)+nproc_array(i)
567 end do
568 end do 
569 
570 
571 !--------------------------------------------------------------------------------
572 ! Act on the A matrix, following the book by Golub  (more or less ;) )
573 !
574 !--------------------------------------------------------------------------------
575 
576 ABI_ALLOCATE(A_matrix, (Hsize,Xsize))
577 ABI_ALLOCATE(V_matrix, (Hsize,Xsize))
578 ABI_ALLOCATE(list_beta, (Xsize))
579 ABI_ALLOCATE(coeff   , (Xsize))
580 
581 A_matrix(:,:) = Xmatrix(:,:)
582 V_matrix(:,:) = cmplx_0
583 list_beta(:)  = cmplx_0
584 
585 
586 ABI_ALLOCATE(vj, (Hsize))
587 
588 do j = 1, Xsize
589 
590 ! Store xj in vj, for now
591 vj(:) = A_matrix(:,j)
592 
593 
594 if (j > 1) then
595   !------------------------------------------
596   ! set the array to zero all the way to j-1
597   !------------------------------------------
598   l_local = j-1-nproc_array(1+mpi_rank)
599 
600   if ( l_local > Hsize) then
601     vj(:) = cmplx_0
602   else if ( l_local <= Hsize  .and. l_local >= 1) then
603     vj(1:l_local) = cmplx_0
604   end if
605 
606 end if
607 
608 ! compute the norm of x
609 norm_x = sum(conjg(vj(:))*vj(:))
610 call xmpi_sum(norm_x,mpi_communicator,ierr) ! sum on all processors in communicator
611 norm_x = sqrt(norm_x)
612 
613 
614 if (abs(norm_x) > tol14) then
615   ! if |x| ~ 0, there is nothing to do! the column in A is full of zeros.
616 
617   ! find the j^th component of x
618   l_local = j-nproc_array(1+mpi_rank)
619 
620   ! update vj, on the right processor!
621   if ( l_local <= Hsize  .and. l_local >= 1) then
622 
623     phase = vj(l_local) 
624 
625     if (abs(phase) < tol14) then
626       phase = cmplx_1
627     else
628       phase = phase/abs(phase)
629     end if
630 
631     vj(l_local) = vj(l_local) + phase*norm_x
632 
633   end if
634 
635   !compute beta
636   cmplx_value = sum(conjg(vj(:))*vj(:))
637   call xmpi_sum(cmplx_value,mpi_communicator,ierr) ! sum on all processors
638 
639   list_beta(j) = 2.0_dp/cmplx_value
640 
641   ! store v for later use; this is less efficient than storing it in the null part of A,
642   ! but it is less of a pain to implement. Feel free to clean this up.
643   V_matrix(:,j) = vj(:)
644 
645 
646   ! Update the A matrix
647 
648   ! Compute v^dagger . A
649   !call ZGEMM(            'C',   & ! Hermitian conjugate the first array
650   !                       'N',   & ! Leave second array as is
651   !                         1,   & ! the number of rows of the  matrix op( A )
652   !                 Xsize-j+1,   & ! the number of columns of the  matrix op( B )
653   !                     Hsize,   & ! the number of columns of the  matrix op( A ) == rows of matrix op( B )
654   !                   cmplx_1,   & ! alpha constant
655   !                        vj,   & ! matrix A
656   !                     Hsize,   & ! LDA
657   !       A_matrix(:,j:Xsize),   & ! matrix B
658   !                     Hsize,   & ! LDB
659   !                   cmplx_0,   & ! beta constant
660   !          coeff(:,j:Xsize),   & ! matrix C
661   !                         1)     ! LDC
662   do i = j, Xsize
663   coeff(i) = sum(conjg(vj)*A_matrix(:,i))
664   end do
665   call xmpi_sum(coeff,mpi_communicator,ierr) ! sum on all processors in the communicator
666 
667   ! update A
668   do i = j, Xsize
669   A_matrix(:,i) = A_matrix(:,i) - list_beta(j)*coeff(i)*vj(:)
670   end do
671 
672 end if
673 
674 end do
675 
676 !--------------------------------------------------------------------------------
677 ! Extract the R matrix
678 !
679 !--------------------------------------------------------------------------------
680 
681 ABI_ALLOCATE(Rinternal,(Xsize,Xsize))
682 
683 Rinternal = cmplx_0
684 
685 
686 do j = 1, Xsize
687 do i = 1, j
688 l_local = i-nproc_array(1+mpi_rank)
689 
690 if ( l_local <= Hsize  .and. l_local >= 1) then
691   Rinternal(i,j) = A_matrix(l_local,j) 
692 end if
693 
694 end do ! i
695 end do ! j
696 
697 call xmpi_sum(Rinternal,mpi_communicator,ierr) ! sum on all processors
698 
699 
700 
701 !--------------------------------------------------------------------------------
702 ! Extract the Q matrix
703 !
704 !--------------------------------------------------------------------------------
705 
706 ABI_ALLOCATE( Qinternal, (Hsize,Xsize))
707 
708 ! initialize Q to the identity in the top corner
709 Qinternal = cmplx_0
710 
711 do j = 1, Xsize
712 l_local = j-nproc_array(1+mpi_rank)
713 
714 if ( l_local <= Hsize  .and. l_local >= 1 ) then
715   Qinternal(l_local,j) = cmplx_1
716 end if
717 
718 end do ! j
719 
720 
721 ! Build Q interatively
722 do j = Xsize,1, -1
723 
724 vj(:) = V_matrix(:,j) 
725 
726 
727 ! Update the A matrix
728 
729 ! Compute v^dagger . A
730 !call ZGEMM(            'C',   & ! Hermitian conjugate the first array
731 !                       'N',   & ! Leave second array as is
732 !                         1,   & ! the number of rows of the  matrix op( A )
733 !                     Xsize,   & ! the number of columns of the  matrix op( B )
734 !                     Hsize,   & ! the number of columns of the  matrix op( A ) == rows of matrix op( B )
735 !                   cmplx_1,   & ! alpha constant
736 !                        vj,   & ! matrix A
737 !                     Hsize,   & ! LDA
738 !                 Qinternal,   & ! matrix B
739 !                     Hsize,   & ! LDB
740 !                   cmplx_0,   & ! beta constant
741 !                     coeff,   & ! matrix C
742 !                         1)     ! LDC
743 
744 do i = 1, Xsize
745 coeff(i) = sum(conjg(vj)*Qinternal(:,i))
746 end do
747 call xmpi_sum(coeff,mpi_communicator,ierr) ! sum on all processors in communicator
748 
749 
750 ! update Q
751 do i = 1, Xsize
752 Qinternal(:,i) = Qinternal(:,i) - list_beta(j)*coeff(i)*vj(:)
753 end do
754 
755 end do ! j
756 
757 
758 
759 ! clean up
760 ABI_DEALLOCATE(V_matrix)
761 ABI_DEALLOCATE(coeff)
762 ABI_DEALLOCATE(vj)
763 
764 !--------------------------------------------------------------------------------
765 ! Do some debug, if requested
766 !
767 !--------------------------------------------------------------------------------
768 
769 if (debug ) then
770 
771   if ( head_node ) then
772 
773     write(io_unit,20) "#    nproc_array   = ",nproc_array
774     flush(io_unit)
775 
776     write(io_unit,40) "#    list_beta     = ",real(list_beta)
777     flush(io_unit)
778   end if
779 
780 
781   ABI_ALLOCATE(error,(Xsize,Xsize))
782 
783   error = cmplx_0
784 
785   do l2=1,Xsize
786   error(l2,l2) = error(l2,l2) - cmplx_1
787   do l1=1,Xsize
788 
789   cmplx_value = complex_vector_product(Qinternal(:,l1),Qinternal(:,l2),Hsize)
790   call xmpi_sum(cmplx_value,mpi_communicator,ierr) ! sum on all processors working on FFT! 
791 
792   error(l1,l2) = error(l1,l2)+cmplx_value
793 
794   end do
795   end do
796 
797   if ( head_node ) then
798     write(io_unit,12) "#               || Q^t.Q - I ||   = ",sqrt(sum(abs(error(:,:))**2))
799     flush(io_unit)
800   end if
801 
802 
803   ABI_DEALLOCATE(error)
804 
805   ABI_ALLOCATE(error,(Hsize,Xsize))
806 
807   error = Xmatrix
808 
809   do l2=1,Xsize
810   do l1=1,Xsize
811   error(:,l2) = error(:,l2) - Qinternal(:,l1)*Rinternal(l1,l2)
812   end do
813   end do
814 
815   real_value = zero
816   do l2=1,Xsize
817   do l1=1,Xsize
818   cmplx_value = complex_vector_product(error(:,l1),error(:,l2),Hsize)
819 
820   real_value  = real_value + abs(cmplx_value)**2
821   end do
822   end do
823 
824   call xmpi_sum(real_value,mpi_communicator,ierr) ! sum on all processors 
825 
826 
827   real_value = sqrt(real_value)
828 
829   if ( head_node) then
830     write(io_unit,12) "#               || Xin - Q.R ||   = ",real_value 
831 
832     if ( real_value > 1.0D-10 ) write(io_unit,10) "#               ERROR!              "
833 
834     write(io_unit,10) '#'
835     write(io_unit,10) '# R matrix'
836     write(io_unit,10) '#'
837     do l1=1, Xsize
838     write(io_unit,30) Rinternal(l1,:)
839     end do
840     write(io_unit,10) ''
841     write(io_unit,10) ''
842 
843 
844     write(io_unit,10) '#'
845     write(io_unit,10) '# top of A matrix'
846     write(io_unit,10) '#'
847     do l1=1, 2*Xsize
848     write(io_unit,30) A_matrix(l1,:)
849     end do
850     write(io_unit,10) ''
851     write(io_unit,10) ''
852 
853     close(io_unit)
854 
855   end if
856 
857   ABI_DEALLOCATE(error)
858 
859 end if
860 
861 !--------------------------------------------------------------------------------
862 ! Final assignment
863 !
864 !--------------------------------------------------------------------------------
865 
866 Xmatrix = Qinternal
867 
868 if (present(Rmatrix)) then
869   Rmatrix = Rinternal
870 end if
871 
872 ABI_DEALLOCATE(Qinternal)
873 ABI_DEALLOCATE(Rinternal)
874 ABI_DEALLOCATE(nproc_array)
875 ABI_DEALLOCATE(A_matrix)
876 ABI_DEALLOCATE(list_beta)
877 
878 
879 10 format(A)
880 11 format(A,I8)
881 12 format(A,E24.16)
882 13 format(A,2X,L10)
883 20 format(A,1000I10)
884 25 format(A,I5,A)
885 30 format(1000(F22.16,2X,F22.16,5X))
886 40 format(A,1000(F22.16,5X))
887 
888 end subroutine extract_QR_Householder

m_hamiltonian/extract_SVD [ Functions ]

[ Top ] [ m_hamiltonian ] [ Functions ]

NAME

  extract_SVD

FUNCTION

  .

INPUTS

OUTPUT

PARENTS

      gwls_DielectricArray

CHILDREN

      xmpi_allgather,xmpi_sum

SOURCE

166 subroutine extract_SVD(mpi_communicator, Hsize,lsolutions_max,svd_matrix,svd_values)
167 !--------------------------------------------------------------------------
168 ! This function computes the singular value decomposition
169 !
170 !                X = U . SIGMA . V^dagger 
171 ! 
172 ! More specifically,  the matrix U of orthonormal vectors and SIGMA
173 ! the eigenvalues are returned.
174 ! 
175 ! different algorithms are used, depending on parallelisation scheme.
176 !--------------------------------------------------------------------------
177 
178 !This section has been created automatically by the script Abilint (TD).
179 !Do not modify the following lines by hand.
180 #undef ABI_FUNC
181 #define ABI_FUNC 'extract_SVD'
182 !End of the abilint section
183 
184 implicit none
185 
186 integer,      intent(in)    :: mpi_communicator
187 integer,      intent(in)    :: Hsize, lsolutions_max
188 complex(dpc), intent(inout) :: svd_matrix(Hsize,lsolutions_max)
189 real(dp),     intent(out)   :: svd_values(lsolutions_max)
190 
191 
192 complex(dpc), allocatable   :: Rmatrix(:,:)
193 complex(dpc), allocatable   :: svd_tmp(:,:)
194 
195 real(dp) :: tsec(2)
196 integer :: GWLS_TIMAB, OPTION_TIMAB
197 
198 ! *************************************************************************
199 
200 
201 
202 GWLS_TIMAB   = 1520
203 OPTION_TIMAB = 1
204 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
205 
206 
207 !if ( mpi_enreg%nproc_fft ==1 ) then
208 if ( .false. ) then
209 
210   call extract_SVD_lapack(Hsize,lsolutions_max,svd_matrix,svd_values)
211 
212 else
213 
214   ABI_ALLOCATE(Rmatrix,(lsolutions_max,lsolutions_max))
215 
216   ! perform QR first
217   call extract_QR(mpi_communicator, Hsize,lsolutions_max,svd_matrix,Rmatrix)
218 
219   ! perform SVD on the much smaller Rmatrix!
220   call extract_SVD_lapack(lsolutions_max,lsolutions_max,Rmatrix,svd_values)
221 
222   ABI_ALLOCATE(svd_tmp,(Hsize,lsolutions_max))
223 
224   ! Rmatrix is overwritten with U matrix from SVD. Update the svd_matrix
225   call ZGEMM(            'N',   & ! Leave first array as is
226   'N',   & ! Leave second array as is
227   Hsize,   & ! the number of rows of the  matrix op( A )
228   lsolutions_max,   & ! the number of columns of the  matrix op( B )
229   lsolutions_max,   & ! the number of columns of the  matrix op( A ) == rows of matrix op( B )
230   cmplx_1,   & ! alpha constant
231   svd_matrix,   & ! matrix A
232   Hsize,   & ! LDA
233   Rmatrix,   & ! matrix B
234   lsolutions_max,   & ! LDB
235   cmplx_0,   & ! beta constant
236   svd_tmp,   & ! matrix C
237   Hsize)     ! LDC
238 
239   svd_matrix(:,:) = svd_tmp(:,:)
240 
241   ABI_DEALLOCATE(svd_tmp)
242   ABI_DEALLOCATE(Rmatrix)
243 end if
244 OPTION_TIMAB = 2
245 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
246 
247 
248 end subroutine extract_SVD

m_hamiltonian/extract_SVD_lapack [ Functions ]

[ Top ] [ m_hamiltonian ] [ Functions ]

NAME

  extract_SVD_lapack

FUNCTION

  .

INPUTS

OUTPUT

PARENTS

      gwls_QR_factorization

CHILDREN

      xmpi_allgather,xmpi_sum

SOURCE

270 subroutine extract_SVD_lapack(Hsize,lsolutions_max,svd_matrix,svd_values)
271 !--------------------------------------------------------------------------
272 ! This function computes the singular value decomposition
273 ! using lapack routines. This is not appropriate in MPI parallel!
274 ! 
275 ! 
276 !--------------------------------------------------------------------------
277 
278 !This section has been created automatically by the script Abilint (TD).
279 !Do not modify the following lines by hand.
280 #undef ABI_FUNC
281 #define ABI_FUNC 'extract_SVD_lapack'
282 !End of the abilint section
283 
284 implicit none
285 
286 
287 integer,      intent(in)    :: Hsize, lsolutions_max
288 complex(dpc), intent(inout) :: svd_matrix(Hsize,lsolutions_max)
289 real(dp),     intent(out)   :: svd_values(lsolutions_max)
290 
291 
292 
293 integer                   :: info_zgesvd
294 integer                   :: lwork_svd
295 complex(dpc), allocatable :: work_svd(:)
296 complex(dpc), allocatable :: svd_U(:,:), svd_V(:,:)
297 real   (dp ), allocatable :: rwork_svd(:)
298 
299 integer        :: debug_unit
300 character(50)  :: debug_filename
301 
302 ! *************************************************************************
303 
304 
305 
306 
307 ! allocate arrays for the svd
308 ABI_ALLOCATE(svd_U                  ,(1,1))
309 ABI_ALLOCATE(svd_V                  ,(1,1))
310 
311 
312 ! DIMENSION QUERRY for singluar decomposition problem
313 
314 ABI_ALLOCATE(rwork_svd    ,(5*min(Hsize,lsolutions_max)))
315 ABI_ALLOCATE(work_svd,(1))
316 lwork_svd = -1
317 
318 call zgesvd('O',            & ! The first min(m,n) columns of U (the left singular vectors) are overwritten on the array A;
319 'N',            & ! no column vectors of V are computed
320 Hsize,          & ! number of rows of the matrix
321 lsolutions_max, & ! number of columns of the matrix
322 svd_matrix,     & ! matrix to be decomposed
323 Hsize,          & ! LDA
324 svd_values,     & ! singular values
325 svd_U,          & ! dummy U; not referenced
326 1,              & ! size of U
327 svd_V,          & ! dummy V; not referenced
328 1,              & ! size of V
329 work_svd,       & ! work array
330 lwork_svd,      & ! size of work array
331 rwork_svd,      & ! work array
332 info_zgesvd )
333 
334 if ( info_zgesvd /= 0) then        
335   debug_unit = get_unit()
336   write(debug_filename,'(A,I4.4,A)') 'LAPACK_DEBUG_PROC=',mpi_enreg%me,'.log'
337 
338   open(debug_unit,file=trim(debug_filename),status='unknown')
339 
340   write(debug_unit,'(A)')      '*********************************************************************************************'
341   write(debug_unit,'(A,I4,A)') '*      ERROR: info = ',info_zgesvd,' in ZGESVD(1), gwls_QR_factorization'
342   write(debug_unit,'(A)')      '*********************************************************************************************'
343 
344   close(debug_unit)
345 
346 end if
347 
348 
349 
350 
351 
352 
353 lwork_svd = nint(dble(work_svd(1)))
354 
355 ABI_DEALLOCATE(work_svd)
356 
357 ABI_ALLOCATE(work_svd,(lwork_svd))
358 
359 ! computation run
360 
361 call zgesvd('O',            & ! The first min(m,n) columns of U (the left singular vectors) are overwritten on the array A;
362 'N',            & ! no column vectors of V are computed
363 Hsize,          & ! number of rows of the matrix
364 lsolutions_max, & ! number of columns of the matrix
365 svd_matrix,     & ! matrix to be decomposed
366 Hsize,          & ! LDA
367 svd_values,     & ! singular values
368 svd_U,          & ! dummy U; not referenced
369 1,              & ! size of U
370 svd_V,          & ! dummy V; not referenced
371 1,              & ! size of V
372 work_svd,       & ! work array
373 lwork_svd,      & ! size of work array
374 rwork_svd,      & ! work array
375 info_zgesvd )
376 
377 if ( info_zgesvd /= 0) then        
378   debug_unit = get_unit()
379   write(debug_filename,'(A,I4.4,A)') 'LAPACK_DEBUG_PROC=',mpi_enreg%me,'.log'
380 
381   open(debug_unit,file=trim(debug_filename),status='unknown')
382 
383   write(debug_unit,'(A)')      '*********************************************************************************************'
384   write(debug_unit,'(A,I4,A)') '*      ERROR: info = ',info_zgesvd,' in ZGESVD(2), gwls_QR_factorization'
385   write(debug_unit,'(A)')      '*********************************************************************************************'
386 
387   close(debug_unit)
388 
389 end if
390 
391 
392 
393 
394 ABI_DEALLOCATE(work_svd)
395 ABI_DEALLOCATE(rwork_svd)
396 ABI_DEALLOCATE(svd_U)
397 ABI_DEALLOCATE(svd_V)
398 
399 end subroutine extract_SVD_lapack