TABLE OF CONTENTS
- ABINIT/gwls_QR_factorization
- m_hamiltonian/extract_QR
- m_hamiltonian/extract_QR_Householder
- m_hamiltonian/extract_SVD
- m_hamiltonian/extract_SVD_lapack
ABINIT/gwls_QR_factorization [ Modules ]
NAME
gwls_QR_factorization
FUNCTION
.
COPYRIGHT
Copyright (C) 2009-2018 ABINIT group (JLJ, BR, MC) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
PARENTS
CHILDREN
SOURCE
21 #if defined HAVE_CONFIG_H 22 #include "config.h" 23 #endif 24 25 #include "abi_common.h" 26 27 28 29 module gwls_QR_factorization 30 !---------------------------------------------------------------------------------------------------- 31 ! This module implements the QR factorization using various algorithms, for the specific 32 ! data distribution corresponding to FFT parallelism. 33 ! 34 ! There are standard routines which do this (lapack, scalapack, etc), however it is complicated 35 ! to get scalapack to run properly in parallel. Implementing ourselves is the shortest path to 36 ! a working solution. 37 !---------------------------------------------------------------------------------------------------- 38 !local modules 39 use m_gwls_utility 40 use gwls_TimingLog 41 use gwls_wf 42 use gwls_hamiltonian 43 44 !abinit modules 45 use defs_basis 46 use defs_datatypes 47 use defs_abitypes 48 use defs_wvltypes 49 use m_profiling_abi 50 use m_xmpi 51 use m_errors 52 use m_io_tools, only : get_unit 53 54 55 implicit none 56 save 57 private
m_hamiltonian/extract_QR [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
extract_QR
FUNCTION
.
INPUTS
OUTPUT
PARENTS
gwls_GWlanczos,gwls_QR_factorization
CHILDREN
xmpi_allgather,xmpi_sum
SOURCE
87 subroutine extract_QR(mpi_communicator,Hsize,Xsize,Xmatrix,Rmatrix) 88 !-------------------------------------------------------------------------- 89 ! This function computes the QR factorization: 90 ! 91 ! X = Q . R 92 ! 93 ! in order to extract the matrix of orthonormal vectors Q and 94 ! the R matrix. 95 ! 96 ! On output, the matrix X is replaced by Q. 97 ! 98 ! If the code is running with only one processor, this routine 99 ! simply invokes extract_QR_serial, which wraps standard Lapack routines. 100 ! If we are running in MPI parallel, the serial Lapack routines no 101 ! longer work (and understanding scalapack is too complicated right now). 102 ! Thus, in that case, this routine implements some old school Gram-Schmidt 103 ! algorithm. 104 !-------------------------------------------------------------------------- 105 106 !This section has been created automatically by the script Abilint (TD). 107 !Do not modify the following lines by hand. 108 #undef ABI_FUNC 109 #define ABI_FUNC 'extract_QR' 110 use interfaces_18_timing 111 !End of the abilint section 112 113 implicit none 114 115 integer, intent(in) :: Hsize, Xsize, mpi_communicator 116 complex(dpc),intent(inout) :: Xmatrix(Hsize,Xsize) 117 118 complex(dpc), intent(out),optional :: Rmatrix(Xsize,Xsize) 119 120 ! local variables 121 122 real(dp) :: tsec(2) 123 integer :: GWLS_TIMAB, OPTION_TIMAB 124 125 ! ************************************************************************* 126 127 128 129 GWLS_TIMAB = 1519 130 OPTION_TIMAB = 1 131 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 132 133 134 !-------------------------------------------------------------------------------- 135 ! Implement Gram-Schmidt. 136 !-------------------------------------------------------------------------------- 137 call extract_QR_Householder(mpi_communicator,Hsize,Xsize,Xmatrix,Rmatrix) 138 139 OPTION_TIMAB = 2 140 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 141 142 end subroutine extract_QR
m_hamiltonian/extract_QR_Householder [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
extract_QR_Householder
FUNCTION
.
INPUTS
OUTPUT
PARENTS
gwls_QR_factorization
CHILDREN
xmpi_allgather,xmpi_sum
SOURCE
425 subroutine extract_QR_Householder(mpi_communicator,Hsize,Xsize,Xmatrix,Rmatrix) 426 !-------------------------------------------------------------------------- 427 ! This function computes the QR factorization: 428 ! 429 ! X = Q . R 430 ! 431 ! in order to extract the matrix of orthonormal vectors Q and 432 ! the R matrix. 433 ! 434 ! On output, the matrix X is replaced by Q. 435 ! 436 ! This routine uses Householder operations to generate Q and R. 437 ! Special attention is given to the fact that the matrix may be 438 ! distributed across processors and MPI communication is necessary. 439 ! 440 !-------------------------------------------------------------------------- 441 442 !This section has been created automatically by the script Abilint (TD). 443 !Do not modify the following lines by hand. 444 #undef ABI_FUNC 445 #define ABI_FUNC 'extract_QR_Householder' 446 !End of the abilint section 447 448 implicit none 449 450 integer, intent(in) :: Hsize, Xsize, mpi_communicator 451 complex(dpc),intent(inout) :: Xmatrix(Hsize,Xsize) 452 453 complex(dpc), intent(out),optional :: Rmatrix(Xsize,Xsize) 454 455 ! local variables 456 integer :: numbrer_of_plane_waves 457 458 integer :: io_unit 459 integer :: ierr 460 character(50) :: filename 461 logical :: file_exists 462 463 integer,save :: counter = 0 464 465 integer :: i, j, l_local 466 integer :: l1, l2 467 468 integer, allocatable :: nproc_array(:) 469 470 complex(dpc), allocatable :: Qinternal(:,:) 471 complex(dpc), allocatable :: Rinternal(:,:) 472 complex(dpc), allocatable :: vj(:) 473 474 complex(dpc), allocatable :: A_matrix(:,:) 475 complex(dpc), allocatable :: V_matrix(:,:) 476 477 complex(dpc), allocatable :: list_beta(:) 478 479 complex(dpc) :: cmplx_value 480 real (dp ) :: real_value 481 482 complex(dpc) :: norm_x 483 complex(dpc) :: phase 484 485 complex(dpc), allocatable :: error(:,:) 486 complex(dpc), allocatable :: coeff(:) 487 488 489 integer :: mpi_rank 490 integer :: mpi_nproc 491 logical :: head_node 492 493 ! ************************************************************************* 494 495 496 !-------------------------------------------------------------------------------- 497 ! Implement Householder algorithm, in parallel 498 !-------------------------------------------------------------------------------- 499 mpi_nproc = xmpi_comm_size(mpi_communicator) 500 501 ! extract the rank of this processor in the communicator 502 mpi_rank = xmpi_comm_rank(mpi_communicator) 503 504 ! only head node will write! 505 head_node = mpi_rank == 0 506 507 508 !-------------------------------------------------------------------------------- 509 ! Open a log file for the output of extract_QR 510 !-------------------------------------------------------------------------------- 511 if (debug .and. head_node ) then 512 513 io_unit = get_unit() 514 write(filename,'(A,I0.4,A)') "extract_QR_",mpi_rank,".log" 515 inquire(file=trim(filename),exist=file_exists) 516 517 if (file_exists) then 518 open(io_unit,file=trim(filename),position='append',status=files_status_old) 519 else 520 open(io_unit,file=trim(filename),status=files_status_new) 521 write(io_unit,10) "#=======================================================================================" 522 write(io_unit,10) "# " 523 write(io_unit,10) "# This file contains information regarding the QR factorization, from extract_QR " 524 write(io_unit,25) "# The algorithm is running in MPI parallel with ",mpi_nproc," processors" 525 write(io_unit,10) "# " 526 write(io_unit,10) "#=======================================================================================" 527 end if 528 529 counter = counter + 1 530 531 write(io_unit,10) "# " 532 write(io_unit,11) "# Call # ", counter 533 write(io_unit,10) "# " 534 write(io_unit,11) "# Hsize = ",Hsize 535 write(io_unit,11) "# Xsize = ",Xsize 536 write(io_unit,13) "# Rmatrix present? = ",present(Rmatrix) 537 538 539 end if 540 541 !-------------------------------------------------------------------------------- 542 ! Get the number of plane waves on every processor 543 !-------------------------------------------------------------------------------- 544 ABI_ALLOCATE(nproc_array,(mpi_nproc)) 545 546 nproc_array = 0 547 548 numbrer_of_plane_waves = Hsize ! do this to avoid "intent" problems 549 call xmpi_allgather(numbrer_of_plane_waves, nproc_array, mpi_communicator, ierr) 550 551 552 !-------------------------------------------------------------------------------- 553 ! Get the offset for each processor 554 ! 555 ! The global index is then given by 556 ! I_{global} = nproc_array(1+rank)+i_{local} 557 ! 558 ! similarly, the local index is given by 559 ! i_{local} = I_{global} - nproc_array(1+rank) 560 ! which is only meaningful if 1 <= i_{local} <= Hsize 561 !-------------------------------------------------------------------------------- 562 563 do j = mpi_nproc, 1, -1 564 nproc_array(j) = 0 565 do i = 1, j-1 566 nproc_array(j) = nproc_array(j)+nproc_array(i) 567 end do 568 end do 569 570 571 !-------------------------------------------------------------------------------- 572 ! Act on the A matrix, following the book by Golub (more or less ;) ) 573 ! 574 !-------------------------------------------------------------------------------- 575 576 ABI_ALLOCATE(A_matrix, (Hsize,Xsize)) 577 ABI_ALLOCATE(V_matrix, (Hsize,Xsize)) 578 ABI_ALLOCATE(list_beta, (Xsize)) 579 ABI_ALLOCATE(coeff , (Xsize)) 580 581 A_matrix(:,:) = Xmatrix(:,:) 582 V_matrix(:,:) = cmplx_0 583 list_beta(:) = cmplx_0 584 585 586 ABI_ALLOCATE(vj, (Hsize)) 587 588 do j = 1, Xsize 589 590 ! Store xj in vj, for now 591 vj(:) = A_matrix(:,j) 592 593 594 if (j > 1) then 595 !------------------------------------------ 596 ! set the array to zero all the way to j-1 597 !------------------------------------------ 598 l_local = j-1-nproc_array(1+mpi_rank) 599 600 if ( l_local > Hsize) then 601 vj(:) = cmplx_0 602 else if ( l_local <= Hsize .and. l_local >= 1) then 603 vj(1:l_local) = cmplx_0 604 end if 605 606 end if 607 608 ! compute the norm of x 609 norm_x = sum(conjg(vj(:))*vj(:)) 610 call xmpi_sum(norm_x,mpi_communicator,ierr) ! sum on all processors in communicator 611 norm_x = sqrt(norm_x) 612 613 614 if (abs(norm_x) > tol14) then 615 ! if |x| ~ 0, there is nothing to do! the column in A is full of zeros. 616 617 ! find the j^th component of x 618 l_local = j-nproc_array(1+mpi_rank) 619 620 ! update vj, on the right processor! 621 if ( l_local <= Hsize .and. l_local >= 1) then 622 623 phase = vj(l_local) 624 625 if (abs(phase) < tol14) then 626 phase = cmplx_1 627 else 628 phase = phase/abs(phase) 629 end if 630 631 vj(l_local) = vj(l_local) + phase*norm_x 632 633 end if 634 635 !compute beta 636 cmplx_value = sum(conjg(vj(:))*vj(:)) 637 call xmpi_sum(cmplx_value,mpi_communicator,ierr) ! sum on all processors 638 639 list_beta(j) = 2.0_dp/cmplx_value 640 641 ! store v for later use; this is less efficient than storing it in the null part of A, 642 ! but it is less of a pain to implement. Feel free to clean this up. 643 V_matrix(:,j) = vj(:) 644 645 646 ! Update the A matrix 647 648 ! Compute v^dagger . A 649 !call ZGEMM( 'C', & ! Hermitian conjugate the first array 650 ! 'N', & ! Leave second array as is 651 ! 1, & ! the number of rows of the matrix op( A ) 652 ! Xsize-j+1, & ! the number of columns of the matrix op( B ) 653 ! Hsize, & ! the number of columns of the matrix op( A ) == rows of matrix op( B ) 654 ! cmplx_1, & ! alpha constant 655 ! vj, & ! matrix A 656 ! Hsize, & ! LDA 657 ! A_matrix(:,j:Xsize), & ! matrix B 658 ! Hsize, & ! LDB 659 ! cmplx_0, & ! beta constant 660 ! coeff(:,j:Xsize), & ! matrix C 661 ! 1) ! LDC 662 do i = j, Xsize 663 coeff(i) = sum(conjg(vj)*A_matrix(:,i)) 664 end do 665 call xmpi_sum(coeff,mpi_communicator,ierr) ! sum on all processors in the communicator 666 667 ! update A 668 do i = j, Xsize 669 A_matrix(:,i) = A_matrix(:,i) - list_beta(j)*coeff(i)*vj(:) 670 end do 671 672 end if 673 674 end do 675 676 !-------------------------------------------------------------------------------- 677 ! Extract the R matrix 678 ! 679 !-------------------------------------------------------------------------------- 680 681 ABI_ALLOCATE(Rinternal,(Xsize,Xsize)) 682 683 Rinternal = cmplx_0 684 685 686 do j = 1, Xsize 687 do i = 1, j 688 l_local = i-nproc_array(1+mpi_rank) 689 690 if ( l_local <= Hsize .and. l_local >= 1) then 691 Rinternal(i,j) = A_matrix(l_local,j) 692 end if 693 694 end do ! i 695 end do ! j 696 697 call xmpi_sum(Rinternal,mpi_communicator,ierr) ! sum on all processors 698 699 700 701 !-------------------------------------------------------------------------------- 702 ! Extract the Q matrix 703 ! 704 !-------------------------------------------------------------------------------- 705 706 ABI_ALLOCATE( Qinternal, (Hsize,Xsize)) 707 708 ! initialize Q to the identity in the top corner 709 Qinternal = cmplx_0 710 711 do j = 1, Xsize 712 l_local = j-nproc_array(1+mpi_rank) 713 714 if ( l_local <= Hsize .and. l_local >= 1 ) then 715 Qinternal(l_local,j) = cmplx_1 716 end if 717 718 end do ! j 719 720 721 ! Build Q interatively 722 do j = Xsize,1, -1 723 724 vj(:) = V_matrix(:,j) 725 726 727 ! Update the A matrix 728 729 ! Compute v^dagger . A 730 !call ZGEMM( 'C', & ! Hermitian conjugate the first array 731 ! 'N', & ! Leave second array as is 732 ! 1, & ! the number of rows of the matrix op( A ) 733 ! Xsize, & ! the number of columns of the matrix op( B ) 734 ! Hsize, & ! the number of columns of the matrix op( A ) == rows of matrix op( B ) 735 ! cmplx_1, & ! alpha constant 736 ! vj, & ! matrix A 737 ! Hsize, & ! LDA 738 ! Qinternal, & ! matrix B 739 ! Hsize, & ! LDB 740 ! cmplx_0, & ! beta constant 741 ! coeff, & ! matrix C 742 ! 1) ! LDC 743 744 do i = 1, Xsize 745 coeff(i) = sum(conjg(vj)*Qinternal(:,i)) 746 end do 747 call xmpi_sum(coeff,mpi_communicator,ierr) ! sum on all processors in communicator 748 749 750 ! update Q 751 do i = 1, Xsize 752 Qinternal(:,i) = Qinternal(:,i) - list_beta(j)*coeff(i)*vj(:) 753 end do 754 755 end do ! j 756 757 758 759 ! clean up 760 ABI_DEALLOCATE(V_matrix) 761 ABI_DEALLOCATE(coeff) 762 ABI_DEALLOCATE(vj) 763 764 !-------------------------------------------------------------------------------- 765 ! Do some debug, if requested 766 ! 767 !-------------------------------------------------------------------------------- 768 769 if (debug ) then 770 771 if ( head_node ) then 772 773 write(io_unit,20) "# nproc_array = ",nproc_array 774 flush(io_unit) 775 776 write(io_unit,40) "# list_beta = ",real(list_beta) 777 flush(io_unit) 778 end if 779 780 781 ABI_ALLOCATE(error,(Xsize,Xsize)) 782 783 error = cmplx_0 784 785 do l2=1,Xsize 786 error(l2,l2) = error(l2,l2) - cmplx_1 787 do l1=1,Xsize 788 789 cmplx_value = complex_vector_product(Qinternal(:,l1),Qinternal(:,l2),Hsize) 790 call xmpi_sum(cmplx_value,mpi_communicator,ierr) ! sum on all processors working on FFT! 791 792 error(l1,l2) = error(l1,l2)+cmplx_value 793 794 end do 795 end do 796 797 if ( head_node ) then 798 write(io_unit,12) "# || Q^t.Q - I || = ",sqrt(sum(abs(error(:,:))**2)) 799 flush(io_unit) 800 end if 801 802 803 ABI_DEALLOCATE(error) 804 805 ABI_ALLOCATE(error,(Hsize,Xsize)) 806 807 error = Xmatrix 808 809 do l2=1,Xsize 810 do l1=1,Xsize 811 error(:,l2) = error(:,l2) - Qinternal(:,l1)*Rinternal(l1,l2) 812 end do 813 end do 814 815 real_value = zero 816 do l2=1,Xsize 817 do l1=1,Xsize 818 cmplx_value = complex_vector_product(error(:,l1),error(:,l2),Hsize) 819 820 real_value = real_value + abs(cmplx_value)**2 821 end do 822 end do 823 824 call xmpi_sum(real_value,mpi_communicator,ierr) ! sum on all processors 825 826 827 real_value = sqrt(real_value) 828 829 if ( head_node) then 830 write(io_unit,12) "# || Xin - Q.R || = ",real_value 831 832 if ( real_value > 1.0D-10 ) write(io_unit,10) "# ERROR! " 833 834 write(io_unit,10) '#' 835 write(io_unit,10) '# R matrix' 836 write(io_unit,10) '#' 837 do l1=1, Xsize 838 write(io_unit,30) Rinternal(l1,:) 839 end do 840 write(io_unit,10) '' 841 write(io_unit,10) '' 842 843 844 write(io_unit,10) '#' 845 write(io_unit,10) '# top of A matrix' 846 write(io_unit,10) '#' 847 do l1=1, 2*Xsize 848 write(io_unit,30) A_matrix(l1,:) 849 end do 850 write(io_unit,10) '' 851 write(io_unit,10) '' 852 853 close(io_unit) 854 855 end if 856 857 ABI_DEALLOCATE(error) 858 859 end if 860 861 !-------------------------------------------------------------------------------- 862 ! Final assignment 863 ! 864 !-------------------------------------------------------------------------------- 865 866 Xmatrix = Qinternal 867 868 if (present(Rmatrix)) then 869 Rmatrix = Rinternal 870 end if 871 872 ABI_DEALLOCATE(Qinternal) 873 ABI_DEALLOCATE(Rinternal) 874 ABI_DEALLOCATE(nproc_array) 875 ABI_DEALLOCATE(A_matrix) 876 ABI_DEALLOCATE(list_beta) 877 878 879 10 format(A) 880 11 format(A,I8) 881 12 format(A,E24.16) 882 13 format(A,2X,L10) 883 20 format(A,1000I10) 884 25 format(A,I5,A) 885 30 format(1000(F22.16,2X,F22.16,5X)) 886 40 format(A,1000(F22.16,5X)) 887 888 end subroutine extract_QR_Householder
m_hamiltonian/extract_SVD [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
extract_SVD
FUNCTION
.
INPUTS
OUTPUT
PARENTS
gwls_DielectricArray
CHILDREN
xmpi_allgather,xmpi_sum
SOURCE
165 subroutine extract_SVD(mpi_communicator, Hsize,lsolutions_max,svd_matrix,svd_values) 166 !-------------------------------------------------------------------------- 167 ! This function computes the singular value decomposition 168 ! 169 ! X = U . SIGMA . V^dagger 170 ! 171 ! More specifically, the matrix U of orthonormal vectors and SIGMA 172 ! the eigenvalues are returned. 173 ! 174 ! different algorithms are used, depending on parallelisation scheme. 175 !-------------------------------------------------------------------------- 176 177 !This section has been created automatically by the script Abilint (TD). 178 !Do not modify the following lines by hand. 179 #undef ABI_FUNC 180 #define ABI_FUNC 'extract_SVD' 181 use interfaces_18_timing 182 !End of the abilint section 183 184 implicit none 185 186 integer, intent(in) :: mpi_communicator 187 integer, intent(in) :: Hsize, lsolutions_max 188 complex(dpc), intent(inout) :: svd_matrix(Hsize,lsolutions_max) 189 real(dp), intent(out) :: svd_values(lsolutions_max) 190 191 192 complex(dpc), allocatable :: Rmatrix(:,:) 193 complex(dpc), allocatable :: svd_tmp(:,:) 194 195 real(dp) :: tsec(2) 196 integer :: GWLS_TIMAB, OPTION_TIMAB 197 198 ! ************************************************************************* 199 200 201 202 GWLS_TIMAB = 1520 203 OPTION_TIMAB = 1 204 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 205 206 207 !if ( mpi_enreg%nproc_fft ==1 ) then 208 if ( .false. ) then 209 210 call extract_SVD_lapack(Hsize,lsolutions_max,svd_matrix,svd_values) 211 212 else 213 214 ABI_ALLOCATE(Rmatrix,(lsolutions_max,lsolutions_max)) 215 216 ! perform QR first 217 call extract_QR(mpi_communicator, Hsize,lsolutions_max,svd_matrix,Rmatrix) 218 219 ! perform SVD on the much smaller Rmatrix! 220 call extract_SVD_lapack(lsolutions_max,lsolutions_max,Rmatrix,svd_values) 221 222 ABI_ALLOCATE(svd_tmp,(Hsize,lsolutions_max)) 223 224 ! Rmatrix is overwritten with U matrix from SVD. Update the svd_matrix 225 call ZGEMM( 'N', & ! Leave first array as is 226 'N', & ! Leave second array as is 227 Hsize, & ! the number of rows of the matrix op( A ) 228 lsolutions_max, & ! the number of columns of the matrix op( B ) 229 lsolutions_max, & ! the number of columns of the matrix op( A ) == rows of matrix op( B ) 230 cmplx_1, & ! alpha constant 231 svd_matrix, & ! matrix A 232 Hsize, & ! LDA 233 Rmatrix, & ! matrix B 234 lsolutions_max, & ! LDB 235 cmplx_0, & ! beta constant 236 svd_tmp, & ! matrix C 237 Hsize) ! LDC 238 239 svd_matrix(:,:) = svd_tmp(:,:) 240 241 ABI_DEALLOCATE(svd_tmp) 242 ABI_DEALLOCATE(Rmatrix) 243 end if 244 OPTION_TIMAB = 2 245 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 246 247 248 end subroutine extract_SVD
m_hamiltonian/extract_SVD_lapack [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
extract_SVD_lapack
FUNCTION
.
INPUTS
OUTPUT
PARENTS
gwls_QR_factorization
CHILDREN
xmpi_allgather,xmpi_sum
SOURCE
270 subroutine extract_SVD_lapack(Hsize,lsolutions_max,svd_matrix,svd_values) 271 !-------------------------------------------------------------------------- 272 ! This function computes the singular value decomposition 273 ! using lapack routines. This is not appropriate in MPI parallel! 274 ! 275 ! 276 !-------------------------------------------------------------------------- 277 278 !This section has been created automatically by the script Abilint (TD). 279 !Do not modify the following lines by hand. 280 #undef ABI_FUNC 281 #define ABI_FUNC 'extract_SVD_lapack' 282 !End of the abilint section 283 284 implicit none 285 286 287 integer, intent(in) :: Hsize, lsolutions_max 288 complex(dpc), intent(inout) :: svd_matrix(Hsize,lsolutions_max) 289 real(dp), intent(out) :: svd_values(lsolutions_max) 290 291 292 293 integer :: info_zgesvd 294 integer :: lwork_svd 295 complex(dpc), allocatable :: work_svd(:) 296 complex(dpc), allocatable :: svd_U(:,:), svd_V(:,:) 297 real (dp ), allocatable :: rwork_svd(:) 298 299 integer :: debug_unit 300 character(50) :: debug_filename 301 302 ! ************************************************************************* 303 304 305 306 307 ! allocate arrays for the svd 308 ABI_ALLOCATE(svd_U ,(1,1)) 309 ABI_ALLOCATE(svd_V ,(1,1)) 310 311 312 ! DIMENSION QUERRY for singluar decomposition problem 313 314 ABI_ALLOCATE(rwork_svd ,(5*min(Hsize,lsolutions_max))) 315 ABI_ALLOCATE(work_svd,(1)) 316 lwork_svd = -1 317 318 call zgesvd('O', & ! The first min(m,n) columns of U (the left singular vectors) are overwritten on the array A; 319 'N', & ! no column vectors of V are computed 320 Hsize, & ! number of rows of the matrix 321 lsolutions_max, & ! number of columns of the matrix 322 svd_matrix, & ! matrix to be decomposed 323 Hsize, & ! LDA 324 svd_values, & ! singular values 325 svd_U, & ! dummy U; not referenced 326 1, & ! size of U 327 svd_V, & ! dummy V; not referenced 328 1, & ! size of V 329 work_svd, & ! work array 330 lwork_svd, & ! size of work array 331 rwork_svd, & ! work array 332 info_zgesvd ) 333 334 if ( info_zgesvd /= 0) then 335 debug_unit = get_unit() 336 write(debug_filename,'(A,I4.4,A)') 'LAPACK_DEBUG_PROC=',mpi_enreg%me,'.log' 337 338 open(debug_unit,file=trim(debug_filename),status='unknown') 339 340 write(debug_unit,'(A)') '*********************************************************************************************' 341 write(debug_unit,'(A,I4,A)') '* ERROR: info = ',info_zgesvd,' in ZGESVD(1), gwls_QR_factorization' 342 write(debug_unit,'(A)') '*********************************************************************************************' 343 344 close(debug_unit) 345 346 end if 347 348 349 350 351 352 353 lwork_svd = nint(dble(work_svd(1))) 354 355 ABI_DEALLOCATE(work_svd) 356 357 ABI_ALLOCATE(work_svd,(lwork_svd)) 358 359 ! computation run 360 361 call zgesvd('O', & ! The first min(m,n) columns of U (the left singular vectors) are overwritten on the array A; 362 'N', & ! no column vectors of V are computed 363 Hsize, & ! number of rows of the matrix 364 lsolutions_max, & ! number of columns of the matrix 365 svd_matrix, & ! matrix to be decomposed 366 Hsize, & ! LDA 367 svd_values, & ! singular values 368 svd_U, & ! dummy U; not referenced 369 1, & ! size of U 370 svd_V, & ! dummy V; not referenced 371 1, & ! size of V 372 work_svd, & ! work array 373 lwork_svd, & ! size of work array 374 rwork_svd, & ! work array 375 info_zgesvd ) 376 377 if ( info_zgesvd /= 0) then 378 debug_unit = get_unit() 379 write(debug_filename,'(A,I4.4,A)') 'LAPACK_DEBUG_PROC=',mpi_enreg%me,'.log' 380 381 open(debug_unit,file=trim(debug_filename),status='unknown') 382 383 write(debug_unit,'(A)') '*********************************************************************************************' 384 write(debug_unit,'(A,I4,A)') '* ERROR: info = ',info_zgesvd,' in ZGESVD(2), gwls_QR_factorization' 385 write(debug_unit,'(A)') '*********************************************************************************************' 386 387 close(debug_unit) 388 389 end if 390 391 392 393 394 ABI_DEALLOCATE(work_svd) 395 ABI_DEALLOCATE(rwork_svd) 396 ABI_DEALLOCATE(svd_U) 397 ABI_DEALLOCATE(svd_V) 398 399 end subroutine extract_SVD_lapack