TABLE OF CONTENTS
- ABINIT/gwls_DielectricArray
- m_hamiltonian/cleanup_projected_Sternheimer_epsilon
- m_hamiltonian/compute_eps_m1_minus_eps_model_m1
- m_hamiltonian/compute_eps_m1_minus_one
- m_hamiltonian/compute_eps_model_m1_minus_one
- m_hamiltonian/generate_frequencies_and_weights
- m_hamiltonian/ProjectedSternheimerEpsilon
ABINIT/gwls_DielectricArray [ Modules ]
NAME
gwls_DielectricArray
FUNCTION
.
COPYRIGHT
Copyright (C) 2009-2018 ABINIT group (JLJ, BR, MC) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
PARENTS
CHILDREN
SOURCE
21 #if defined HAVE_CONFIG_H 22 #include "config.h" 23 #endif 24 25 #include "abi_common.h" 26 27 28 module gwls_DielectricArray 29 !---------------------------------------------------------------------------------------------------- 30 ! This module generates and stores the arrays 31 ! 32 ! { eps^{-1}(iw)-eps_model^{-1}(iw) } in Lanczos basis 33 ! { eps_model^{-1}(iw) - 1 } in model Lanczos basis 34 ! 35 ! It makes sense to build these only once, as they do not depend on the external frequency. 36 ! 37 !---------------------------------------------------------------------------------------------------- 38 39 ! local modules 40 use m_gwls_utility 41 use gwls_wf 42 use gwls_valenceWavefunctions 43 use gwls_hamiltonian 44 use gwls_lineqsolver 45 use gwls_polarisability 46 use gwls_model_polarisability 47 use gwls_GenerateEpsilon 48 use gwls_TimingLog 49 use gwls_QR_factorization 50 use gwls_LanczosBasis 51 52 ! abinit modules 53 use defs_basis 54 use m_profiling_abi 55 use m_xmpi 56 use m_cgtools 57 58 use m_io_tools, only: get_unit 59 use m_gaussian_quadrature, only: get_frequencies_and_weights_legendre 60 61 62 implicit none 63 save 64 65 private
m_hamiltonian/cleanup_projected_Sternheimer_epsilon [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
cleanup_projected_Sternheimer_epsilon
FUNCTION
.
INPUTS
OUTPUT
PARENTS
gwls_ComputeCorrelationEnergy
CHILDREN
cpu_time,extract_svd,g_to_r,gr_to_g,hpsik,pc_k_valence_kernel setup_pk_model,sqmr,sqrt_vc_k,wf_block_distribute write_text_block_in_timing_log,write_timing_log,xmpi_sum,zgemm,zgesv
SOURCE
1036 subroutine cleanup_projected_Sternheimer_epsilon 1037 1038 1039 !This section has been created automatically by the script Abilint (TD). 1040 !Do not modify the following lines by hand. 1041 #undef ABI_FUNC 1042 #define ABI_FUNC 'cleanup_projected_Sternheimer_epsilon' 1043 !End of the abilint section 1044 1045 implicit none 1046 1047 ! ************************************************************************* 1048 1049 if(allocated(projected_epsilon_M_matrix)) then 1050 ABI_DEALLOCATE(projected_epsilon_M_matrix) 1051 end if 1052 if(allocated(projected_epsilon_B_matrix)) then 1053 ABI_DEALLOCATE(projected_epsilon_B_matrix) 1054 end if 1055 if(allocated(projected_epsilon_G_matrix)) then 1056 ABI_DEALLOCATE(projected_epsilon_G_matrix) 1057 end if 1058 if(allocated(eps_m1_minus_eps_model_m1)) then 1059 ABI_DEALLOCATE(eps_m1_minus_eps_model_m1) 1060 end if 1061 if(allocated(list_omega)) then 1062 ABI_DEALLOCATE(list_omega) 1063 end if 1064 if(allocated(list_weights)) then 1065 ABI_DEALLOCATE(list_weights) 1066 end if 1067 if(allocated(eps_model_m1_minus_one_DISTR)) then 1068 ABI_DEALLOCATE(eps_model_m1_minus_one_DISTR) 1069 end if 1070 if(allocated(model_lanczos_vector_belongs_to_this_node)) then 1071 ABI_DEALLOCATE(model_lanczos_vector_belongs_to_this_node) 1072 end if 1073 if(allocated(model_lanczos_vector_index)) then 1074 ABI_DEALLOCATE(model_lanczos_vector_index) 1075 end if 1076 1077 1078 end subroutine cleanup_projected_Sternheimer_epsilon
m_hamiltonian/compute_eps_m1_minus_eps_model_m1 [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
compute_eps_m1_minus_eps_model_m1
FUNCTION
.
INPUTS
OUTPUT
PARENTS
gwls_ComputeCorrelationEnergy
CHILDREN
cpu_time,extract_svd,g_to_r,gr_to_g,hpsik,pc_k_valence_kernel setup_pk_model,sqmr,sqrt_vc_k,wf_block_distribute write_text_block_in_timing_log,write_timing_log,xmpi_sum,zgemm,zgesv
SOURCE
205 subroutine compute_eps_m1_minus_eps_model_m1(lmax, npt_gauss) 206 !---------------------------------------------------------------------------------------------------- 207 ! 208 ! This subroutine computes the array 209 ! 210 ! eps^{-1}(iw) - eps_model^{-1}(iw), 211 ! 212 ! for all relevant frequencies in the Lanczos basis. 213 !---------------------------------------------------------------------------------------------------- 214 215 !This section has been created automatically by the script Abilint (TD). 216 !Do not modify the following lines by hand. 217 #undef ABI_FUNC 218 #define ABI_FUNC 'compute_eps_m1_minus_eps_model_m1' 219 !End of the abilint section 220 221 implicit none 222 223 integer , intent(in) :: lmax, npt_gauss 224 225 character(256) :: timing_string 226 real(dp) :: time1, time2 227 real(dp) :: time 228 229 integer :: iw, l 230 complex(dpc),allocatable :: dummy_matrix(:,:) 231 complex(dpc),allocatable :: iden(:,:) 232 233 ! ************************************************************************* 234 235 236 timing_string = "#" 237 call write_text_block_in_Timing_log(timing_string) 238 timing_string = "# Computing eps^{-1}(iw) - eps_model^{-1}(iw) " 239 call write_text_block_in_Timing_log(timing_string) 240 timing_string = "#" 241 call write_text_block_in_Timing_log(timing_string) 242 243 244 call cpu_time(time1) 245 ! Allocate the module array 246 ABI_ALLOCATE(eps_m1_minus_eps_model_m1, (lmax,lmax,npt_gauss+1)) 247 ABI_ALLOCATE(dummy_matrix, (lmax,lmax)) 248 ABI_ALLOCATE(iden, (lmax,lmax)) 249 250 251 iden = cmplx_0 252 253 do l = 1, lmax 254 iden(l,l) = cmplx_1 255 end do 256 257 do iw = 1, npt_gauss + 1 258 259 260 dummy_matrix(:,:) = projected_dielectric_Lanczos_basis(:,:,iw) 261 262 call driver_invert_positive_definite_hermitian_matrix(dummy_matrix,lmax) 263 264 265 eps_m1_minus_eps_model_m1(:,:,iw) = dummy_matrix(:,:) 266 267 dummy_matrix(:,:) = model_dielectric_Lanczos_basis(:,:,iw) 268 269 270 271 call driver_invert_positive_definite_hermitian_matrix(dummy_matrix,lmax) 272 273 eps_m1_minus_eps_model_m1(:,:,iw) = eps_m1_minus_eps_model_m1(:,:,iw) - dummy_matrix(:,:) 274 275 276 end do 277 278 279 280 281 ! Deallocate the arrays which are no longer needed 282 ABI_DEALLOCATE(model_dielectric_Lanczos_basis) 283 ABI_DEALLOCATE(projected_dielectric_Lanczos_basis) 284 285 286 287 ABI_DEALLOCATE(dummy_matrix) 288 ABI_DEALLOCATE(iden) 289 290 291 292 call cpu_time(time2) 293 time = time2-time1 294 295 timing_string = "# Total time : " 296 call write_timing_log(timing_string,time) 297 298 299 300 301 end subroutine compute_eps_m1_minus_eps_model_m1
m_hamiltonian/compute_eps_m1_minus_one [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
compute_eps_m1_minus_one
FUNCTION
.
INPUTS
OUTPUT
PARENTS
gwls_ComputeCorrelationEnergy
CHILDREN
cpu_time,extract_svd,g_to_r,gr_to_g,hpsik,pc_k_valence_kernel setup_pk_model,sqmr,sqrt_vc_k,wf_block_distribute write_text_block_in_timing_log,write_timing_log,xmpi_sum,zgemm,zgesv
SOURCE
325 subroutine compute_eps_m1_minus_one(lmax, npt_gauss) 326 !---------------------------------------------------------------------------------------------------- 327 ! 328 ! This subroutine computes the array 329 ! 330 ! eps^{-1}(iw) - I 331 ! 332 ! for all relevant frequencies in the Lanczos basis. 333 !---------------------------------------------------------------------------------------------------- 334 335 !This section has been created automatically by the script Abilint (TD). 336 !Do not modify the following lines by hand. 337 #undef ABI_FUNC 338 #define ABI_FUNC 'compute_eps_m1_minus_one' 339 !End of the abilint section 340 341 implicit none 342 343 integer , intent(in) :: lmax, npt_gauss 344 345 character(256) :: timing_string 346 real(dp) :: time1, time2 347 real(dp) :: time 348 349 integer :: iw, l 350 complex(dpc),allocatable :: dummy_matrix(:,:) 351 complex(dpc),allocatable :: iden(:,:) 352 353 ! ************************************************************************* 354 355 356 357 timing_string = "#" 358 call write_text_block_in_Timing_log(timing_string) 359 timing_string = "# Computing eps^{-1}(iw) - I " 360 call write_text_block_in_Timing_log(timing_string) 361 timing_string = "#" 362 call write_text_block_in_Timing_log(timing_string) 363 364 call cpu_time(time1) 365 ! Allocate the module array 366 367 ! The array eps_m1_minus_eps_model_m1 will be used to store 368 ! eps^{-1}-1; we can think of eps_model = I in this case. 369 ABI_ALLOCATE(eps_m1_minus_eps_model_m1, (lmax,lmax,npt_gauss+1)) 370 371 ABI_ALLOCATE(dummy_matrix, (lmax,lmax)) 372 ABI_ALLOCATE(iden, (lmax,lmax)) 373 374 iden = cmplx_0 375 376 do l = 1, lmax 377 iden(l,l) = cmplx_1 378 end do 379 380 381 do iw = 1, npt_gauss + 1 382 383 dummy_matrix(:,:) = projected_dielectric_Lanczos_basis(:,:,iw) 384 385 call driver_invert_positive_definite_hermitian_matrix(dummy_matrix,lmax) 386 387 eps_m1_minus_eps_model_m1(:,:,iw) = dummy_matrix(:,:)-iden(:,:) 388 389 end do 390 391 392 ! Deallocate the arrays which are no longer needed 393 ABI_DEALLOCATE(projected_dielectric_Lanczos_basis) 394 395 ABI_DEALLOCATE(dummy_matrix) 396 ABI_DEALLOCATE(iden) 397 398 call cpu_time(time2) 399 time = time2-time1 400 401 timing_string = "# Total time : " 402 call write_timing_log(timing_string,time) 403 404 end subroutine compute_eps_m1_minus_one
m_hamiltonian/compute_eps_model_m1_minus_one [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
compute_eps_model_m1_minus_one
FUNCTION
.
INPUTS
OUTPUT
PARENTS
gwls_ComputeCorrelationEnergy
CHILDREN
cpu_time,extract_svd,g_to_r,gr_to_g,hpsik,pc_k_valence_kernel setup_pk_model,sqmr,sqrt_vc_k,wf_block_distribute write_text_block_in_timing_log,write_timing_log,xmpi_sum,zgemm,zgesv
SOURCE
428 subroutine compute_eps_model_m1_minus_one(lmax_model, npt_gauss, second_model_parameter, epsilon_model_eigenvalues_0) 429 !---------------------------------------------------------------------------------------------------- 430 ! 431 ! This subroutine computes the array 432 ! 433 ! eps_model^{-1}(iw) - 1 434 ! 435 ! for all relevant frequencies in the model Lanczos basis. 436 ! 437 ! This array can potentially get very large, as the complementary basis gets big to achieve 438 ! convergence. Correspondingly, it makes sense to DISTRIBUTE this array on all processors. 439 ! 440 ! The algorithm will go as follows: 441 ! 442 ! eps_m = 1 - V . P . V, V = sqrt{vc} 443 ! 444 ! I ) compute VPV, storing blocks on different processors: 445 ! 446 ! VPV = [ ------- -------- ] = VPV[lc, nB, nW] 447 ! | [| block | block | ] 448 ! lc [| 1 | 2 | ... ] 449 ! | [| | | ] 450 ! | [| | | ] 451 ! | [ ------- --------- ] 452 ! bsize 453 ! 454 ! This construction *does* involve a fair bit of communications, but it takes 455 ! a lot less RAM! 456 ! 457 ! II) once VPV is constructed, do, one frequency at a time: 458 ! - import all blocks to the HEAD processor 459 ! - compute eps_m = 1- VPV 460 ! - invert eps_m^{-1} 461 ! - subtract identity eps_m^{-1} - 1 462 ! - redistribute, block by block 463 ! 464 ! Doing this frequency by frequency will reduce the RAM weight on the head node. 465 ! 466 ! 467 !---------------------------------------------------------------------------------------------------- 468 469 !This section has been created automatically by the script Abilint (TD). 470 !Do not modify the following lines by hand. 471 #undef ABI_FUNC 472 #define ABI_FUNC 'compute_eps_model_m1_minus_one' 473 use interfaces_18_timing 474 !End of the abilint section 475 476 implicit none 477 478 integer, intent(in) :: lmax_model, npt_gauss 479 real(dp), intent(in) :: second_model_parameter 480 real(dp), intent(in) :: epsilon_model_eigenvalues_0(lmax_model) 481 482 integer :: l, l1, l2 483 integer :: iw 484 integer :: v 485 integer :: ierr 486 487 real(dp), allocatable :: psikg_valence(:,:) 488 real(dp), allocatable :: psir_valence(:,:,:,:) 489 490 491 real(dp), allocatable :: psik_wrk(:,:) 492 real(dp), allocatable :: psikb_wrk(:,:) 493 real(dp), allocatable :: psikg_wrk(:,:) 494 real(dp), allocatable :: psikg_tmp(:,:) 495 496 complex(dpc),allocatable :: local_Lbasis_conjugated(:,:) 497 498 499 complex(dpc),allocatable :: VPV(:,:,:) 500 real(dp), allocatable :: re_buffer(:,:), im_buffer(:,:) 501 502 complex(dpc),allocatable :: vpv_row(:) 503 504 505 complex(dpc),allocatable :: epsilon_head(:,:) 506 507 real(dp), allocatable :: re_BUFFER_head(:,:) 508 real(dp), allocatable :: im_BUFFER_head(:,:) 509 510 511 512 complex(dpc),allocatable :: YL(:) 513 514 character(256) :: timing_string 515 real(dp) :: time1, time2, time 516 real(dp) :: fft_time1, fft_time2, fft_time 517 real(dp) :: prod_time1, prod_time2, prod_time 518 519 complex(dpc) :: z 520 521 522 integer :: iblk_lanczos, nbdblock_lanczos 523 integer :: mb 524 integer :: lb 525 integer :: ik 526 527 integer :: mpi_communicator 528 integer :: mpi_nproc 529 integer :: mpi_rank 530 integer :: mpi_head_rank 531 532 533 integer,allocatable :: sendcounts(:), displs(:) 534 535 integer :: sendcount, recvcount 536 537 538 logical :: head 539 540 541 ! timing 542 real(dp) :: tsec(2) 543 integer :: GWLS_TIMAB, OPTION_TIMAB 544 545 ! ************************************************************************* 546 547 548 GWLS_TIMAB = 1541 549 OPTION_TIMAB = 1 550 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 551 552 553 554 timing_string = "#" 555 call write_text_block_in_Timing_log(timing_string) 556 timing_string = "# computing eps_model_m1_minus_one" 557 call write_text_block_in_Timing_log(timing_string) 558 timing_string = "#" 559 call write_text_block_in_Timing_log(timing_string) 560 561 562 ! Number of blocks of lanczos vectors (blocksize = npband) 563 nbdblock_lanczos = lmax_model/blocksize 564 if (modulo(lmax_model,blocksize) /= 0) nbdblock_lanczos = nbdblock_lanczos + 1 565 566 567 ! communicator 568 mpi_communicator = mpi_enreg%comm_bandfft 569 570 ! total number of processors in the communicator 571 mpi_nproc = xmpi_comm_size(mpi_communicator ) 572 573 ! what is the rank of this processor? 574 mpi_rank = xmpi_comm_rank(mpi_communicator ) 575 576 ! rank of the "head" processor 577 mpi_head_rank = 0 578 579 580 ! number of blocks in the model dielectric matrix, which is equal to the number of processors 581 nbdblock_epsilon = mpi_nproc 582 583 584 ! number of vectors in every block 585 blocksize_epsilon = lmax_model/mpi_nproc 586 if (modulo(lmax_model,mpi_nproc) /= 0) blocksize_epsilon = blocksize_epsilon + 1 587 588 ! attribute blocks to every nodes, and tabulate ownership in logical array 589 ! This is not *the most efficient* implementation possible, but it is convenient 590 ABI_ALLOCATE( model_lanczos_vector_belongs_to_this_node, (lmax_model)) 591 ABI_ALLOCATE( model_lanczos_vector_index, (lmax_model)) 592 593 594 model_lanczos_vector_index = 0 595 model_lanczos_vector_belongs_to_this_node = .false. 596 597 do l =1, lmax_model 598 if (mpi_rank == (l-1)/blocksize_epsilon) then 599 model_lanczos_vector_belongs_to_this_node(l) = .true. 600 model_lanczos_vector_index(l) = l-mpi_rank*blocksize_epsilon 601 end if 602 end do 603 604 !write(100+mpi_rank,*) "model_lanczos_vector_belongs_to_this_node = ",model_lanczos_vector_belongs_to_this_node(:) 605 !write(100+mpi_rank,*) "model_lanczos_vector_index = ",model_lanczos_vector_index(:) 606 !flush(100+mpi_rank) 607 608 ! Prepare the array that will contain the matrix elements of the model operator 609 ABI_ALLOCATE(VPV, (lmax_model,blocksize_epsilon,npt_gauss+1)) 610 611 VPV(:,:,:) = cmplx_0 612 613 ABI_ALLOCATE(vpv_row, (lmax_model)) 614 615 616 ! various working arrays 617 GWLS_TIMAB = 1542 618 OPTION_TIMAB = 1 619 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 620 621 ABI_ALLOCATE(psikg_valence,(2,npw_g)) 622 ABI_ALLOCATE(psir_valence ,(2,n4,n5,n6)) 623 624 625 ABI_ALLOCATE(psik_wrk, (2,npw_k)) 626 ABI_ALLOCATE(psikb_wrk, (2,npw_kb)) 627 ABI_ALLOCATE(psikg_wrk, (2,npw_g)) 628 ABI_ALLOCATE(psikg_tmp, (2,npw_g)) 629 630 ABI_ALLOCATE(local_Lbasis_conjugated,(npw_k,lmax_model)) 631 ABI_ALLOCATE(YL,(npw_k)) 632 633 OPTION_TIMAB = 2 634 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 635 636 637 638 fft_time = zero 639 prod_time = zero 640 641 642 call cpu_time(time1) 643 ! loop on all valence bands 644 645 do v = 1, nbandv 646 647 648 ! copy pre-calculated valence state in this covenient local array 649 psikg_valence(:,:) = kernel_wavefunctions_FFT(:,:,v) 650 651 ! compute fourier transform of valence state, and conjugate 652 call g_to_r(psir_valence,psikg_valence) 653 psir_valence(2,:,:,:) = -psir_valence(2,:,:,:) 654 655 !-------------------------------------------------------------------------- 656 ! 657 ! Step 1: build the modified basis Pc . [ (V^{1/2} l) . psi_v^*]. 658 ! 659 !-------------------------------------------------------------------------- 660 GWLS_TIMAB = 1543 661 OPTION_TIMAB = 1 662 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 663 664 ! loop on all blocks of lanczos vectors 665 do iblk_lanczos = 1, nbdblock_lanczos 666 ! loop on all states within this block 667 do mb = 1, blocksize 668 669 ! Determine the index of the Lanczos vector 670 l = (iblk_lanczos-1)*blocksize + mb 671 672 if ( l <= lmax_model) then 673 psik_wrk(1,:) = dble (Lbasis_model_lanczos(:,l)) 674 psik_wrk(2,:) = dimag(Lbasis_model_lanczos(:,l)) 675 else 676 psik_wrk(:,:) = zero 677 end if 678 679 ! apply Coulomb potential 680 call sqrt_vc_k(psik_wrk) 681 682 ! Store in array of blocks of wavefunctions 683 psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:) 684 685 end do ! mb 686 687 call cpu_time(fft_time1) 688 689 ! Transform to FFT representation 690 call wf_block_distribute(psikb_wrk, psikg_wrk,1) ! LA -> FFT 691 692 693 ! generate the vector Pc [ (sqrt_V_c.l) psi_v^*] 694 695 ! Compute the real space product, and return to k space, in FFT configuration 696 call gr_to_g(psikg_tmp,psir_valence, psikg_wrk) 697 698 699 call cpu_time(fft_time2) 700 fft_time = fft_time + fft_time2-fft_time1 701 702 ! project 703 call pc_k_valence_kernel(psikg_tmp) 704 705 ! Return to LA configuration 706 707 ! Transform to FFT representation 708 call wf_block_distribute(psikb_wrk, psikg_tmp, 2) ! FFT -> LA 709 710 do mb = 1, blocksize 711 712 ! Determine the index of the Lanczos vector 713 l = (iblk_lanczos-1)*blocksize + mb 714 715 716 if ( l <= lmax_model) then 717 psik_wrk(:,:) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) 718 local_Lbasis_conjugated(:,l) = cmplx_1*psik_wrk(1,:)+cmplx_i*psik_wrk(2,:) 719 end if 720 721 722 end do ! mb 723 724 end do !iblk_lanczos 725 OPTION_TIMAB = 2 726 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 727 728 729 !-------------------------------------------------------------------------- 730 ! Step 2: Now that we have the modified basis, compute the matrix 731 ! elements of the model dielectric operator 732 ! 733 ! 734 !-------------------------------------------------------------------------- 735 GWLS_TIMAB = 1544 736 OPTION_TIMAB = 1 737 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 738 739 do iw = 2, npt_gauss+1 740 741 call setup_Pk_model(list_omega(iw),second_model_parameter) 742 743 call cpu_time(prod_time1) 744 do l1 = 1, lmax_model 745 746 ! Apply core function Y to left-vector; conjugate 747 do ik = 1, npw_k 748 YL(ik) = model_Y_LA(ik)*conjg(local_Lbasis_conjugated(ik,l1)) 749 end do 750 751 ! Only compute lower diagonal part of matrix; epsilon is hermitian conjugate! 752 vpv_row = cmplx_0 753 do l2 = 1, l1 754 do ik = 1, npw_k 755 vpv_row(l2) = vpv_row(l2) + YL(ik)*local_Lbasis_conjugated(ik,l2) 756 end do 757 end do 758 759 ! the code below is twice as long! 760 !call ZGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) 761 !call ZGEMV ( 'T', npw_k, lmax_model, cmplx_1, local_Lbasis_conjugated, npw_k, YL, 1, cmplx_0, vpv_row, 1) 762 763 !do l2 = 1, l1 764 ! eps_model_m1_minus_one(l1,l2,iw) = eps_model_m1_minus_one(l1,l2,iw) & 765 ! -complex_vector_product(YL, local_Lbasis_conjugated(:,l2),npw_k) 766 !end do 767 768 ! Sum on all processors, making sure all processors have the total vpv_row 769 call xmpi_sum(vpv_row, mpi_communicator, ierr) ! sum on all processors for LA configuration 770 771 ! Each processor takes its slice! 772 do l2 =1, l1 773 if ( model_lanczos_vector_belongs_to_this_node(l2) ) then 774 lb = model_lanczos_vector_index(l2) 775 VPV(l1,lb,iw) = VPV(l1,lb,iw) + vpv_row(l2) 776 end if 777 end do 778 779 end do 780 call cpu_time(prod_time2) 781 782 prod_time = prod_time + prod_time2-prod_time1 783 784 end do ! iw 785 OPTION_TIMAB = 2 786 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 787 788 end do ! v 789 790 call cpu_time(time2) 791 792 time = time2-time1 793 timing_string = "# computing VPV : " 794 call write_timing_log(timing_string,time) 795 796 timing_string = "# --- of which is FFT transforms : " 797 call write_timing_log(timing_string,fft_time) 798 799 timing_string = "# --- of which is products : " 800 call write_timing_log(timing_string,prod_time) 801 802 803 804 805 806 GWLS_TIMAB = 1542 807 OPTION_TIMAB = 1 808 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 809 ABI_DEALLOCATE(local_Lbasis_conjugated) 810 ABI_DEALLOCATE(YL) 811 ABI_DEALLOCATE(psikg_valence) 812 ABI_DEALLOCATE(psir_valence) 813 ABI_DEALLOCATE(psik_wrk) 814 ABI_DEALLOCATE(psikb_wrk) 815 ABI_DEALLOCATE(psikg_wrk) 816 ABI_DEALLOCATE(psikg_tmp) 817 ABI_DEALLOCATE(vpv_row) 818 819 OPTION_TIMAB = 2 820 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 821 822 823 824 call cpu_time(time1) 825 GWLS_TIMAB = 1547 826 OPTION_TIMAB = 1 827 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 828 829 830 !-------------------------------------------------------------------------------- 831 ! 832 ! 833 ! Gather dielectric matrix on head node, invert, and re-distribute. This 834 ! Saves a lot of RAM, without needing the full machinery of ScaLAPACK. 835 ! 836 !-------------------------------------------------------------------------------- 837 838 839 ABI_ALLOCATE(eps_model_m1_minus_one_DISTR, (lmax_model,blocksize_epsilon,npt_gauss+1)) 840 ABI_ALLOCATE(re_buffer, (lmax_model,blocksize_epsilon)) 841 ABI_ALLOCATE(im_buffer, (lmax_model,blocksize_epsilon)) 842 843 eps_model_m1_minus_one_DISTR(:,:,:) = cmplx_0 844 845 ! Define the head node, which will invert the dielectric matrices 846 head = mpi_rank == mpi_head_rank 847 848 ! Amount of data received and sent 849 sendcount = lmax_model*blocksize_epsilon 850 recvcount = lmax_model*blocksize_epsilon 851 852 ABI_ALLOCATE(sendcounts,(mpi_nproc)) 853 ABI_ALLOCATE(displs ,(mpi_nproc)) 854 sendcounts(:) = sendcount 855 856 do l =1, mpi_nproc 857 displs(l) = (l-1)*sendcount 858 end do 859 860 if (head) then 861 ! build and invert the dielectric array 862 ! careful! lmax_model not necessarily equal to blocksize_epsilon*nbdblock_epsilon 863 ABI_ALLOCATE(re_BUFFER_head, (lmax_model, blocksize_epsilon*nbdblock_epsilon)) 864 ABI_ALLOCATE(im_BUFFER_head, (lmax_model, blocksize_epsilon*nbdblock_epsilon)) 865 ABI_ALLOCATE(epsilon_head, (lmax_model, lmax_model)) 866 else 867 !This looks superfluous and it is on large number of systems, but sending these 868 !unallocated in xmpi_scatterv caused 'cannot allocate memory' cryptic errors on 869 !several parallel test farm computers (cronos_gcc46_paral, petrus_nag, inca_gcc44_sdebug) 870 ABI_ALLOCATE(re_BUFFER_head, (1,1)) 871 ABI_ALLOCATE(im_BUFFER_head, (1,1)) 872 ABI_ALLOCATE(epsilon_head, (1,1)) 873 end if 874 875 ! Do one frequency at a time, to avoid overflowing the RAM 876 do iw = 1, npt_gauss+1 877 ! Gather, except for static case 878 if ( iw /=1 ) then 879 ! Gather VPV on head node, for this frequency 880 call xmpi_gather(dble(VPV(:,:,iw)), sendcount , re_BUFFER_head, recvcount, mpi_head_rank, mpi_communicator,ierr) 881 call xmpi_gather(dimag(VPV(:,:,iw)), sendcount , im_BUFFER_head, recvcount, mpi_head_rank, mpi_communicator,ierr) 882 end if 883 884 if ( head ) then 885 886 ! fill the dielectric matrix 887 888 epsilon_head(:,:) = cmplx_0 889 if (iw ==1) then 890 ! STATIC CASE, diagonal matrix 891 do l= 1, lmax_model 892 epsilon_head(l,l) = cmplx_1/epsilon_model_eigenvalues_0(l)-cmplx_1 893 end do 894 895 else 896 ! DYNAMIC CASE, compute 897 do l1 =1, lmax_model 898 do l2 =1, l1 899 z = -cmplx_1*re_BUFFER_head(l1,l2)-cmplx_i*im_BUFFER_head(l1,l2) 900 epsilon_head(l1,l2) = z 901 epsilon_head(l2,l1) = conjg(z) 902 end do 903 epsilon_head(l1,l1) = epsilon_head(l1,l1) + cmplx_1 904 end do 905 906 ! invert the matrix 907 call driver_invert_positive_definite_hermitian_matrix(epsilon_head,lmax_model) 908 909 ! subtract identity 910 do l =1, lmax_model 911 epsilon_head(l,l) = epsilon_head(l,l) - cmplx_1 912 end do 913 end if 914 915 ! copy in head buffer 916 re_BUFFER_head(:,:) = zero 917 im_BUFFER_head(:,:) = zero 918 do l1 =1, lmax_model 919 do l2 =1, lmax_model 920 z = epsilon_head(l1,l2) 921 re_BUFFER_head(l1,l2) = dble(z) 922 im_BUFFER_head(l1,l2) = dimag(z) 923 end do 924 end do 925 926 end if 927 928 ! Scatter back the data on the head to all processors 929 call xmpi_scatterv(re_BUFFER_head, sendcounts, displs, re_buffer, recvcount, mpi_head_rank, mpi_communicator, ierr) 930 call xmpi_scatterv(im_BUFFER_head, sendcounts, displs, im_buffer, recvcount, mpi_head_rank, mpi_communicator, ierr) 931 932 eps_model_m1_minus_one_DISTR(:,:,iw) = cmplx_1*re_buffer(:,:) + cmplx_i*im_buffer(:,:) 933 934 end do 935 936 ABI_DEALLOCATE(re_BUFFER_head) 937 ABI_DEALLOCATE(im_BUFFER_head) 938 ABI_DEALLOCATE(epsilon_head) 939 940 941 942 !================================================================================ 943 ! 944 ! For debugging purposes, store distributed dielectric matrix back in the 945 ! complete local copies, to insure the rest of the code works. 946 ! 947 !================================================================================ 948 949 if (.false.) then 950 ! Prepare the array that will contain the matrix elements of the model operator 951 ! THIS IS ONLY FOR THE REST OF THE CODE TO WORK; WE WILL REMOVE THIS 952 ! TO SAVE RAM LATER 953 ABI_ALLOCATE(eps_model_m1_minus_one, (lmax_model,lmax_model,npt_gauss+1)) 954 955 ! initialize the array with zeros 956 eps_model_m1_minus_one = cmplx_0 957 958 ! Amount of data received and sent 959 sendcount = lmax_model*blocksize_epsilon 960 recvcount = lmax_model*blocksize_epsilon*nbdblock_epsilon 961 962 963 ABI_ALLOCATE(re_BUFFER_head, (lmax_model,blocksize_epsilon*nbdblock_epsilon)) 964 ABI_ALLOCATE(im_BUFFER_head, (lmax_model,blocksize_epsilon*nbdblock_epsilon)) 965 966 do iw = 1, npt_gauss+1 967 968 re_buffer(:,:) = dble( eps_model_m1_minus_one_DISTR(:,:,iw)) 969 im_buffer(:,:) = dimag(eps_model_m1_minus_one_DISTR(:,:,iw)) 970 971 call xmpi_allgather(re_buffer,sendcount,re_BUFFER_head,mpi_communicator,ierr) 972 call xmpi_allgather(im_buffer,sendcount,im_BUFFER_head,mpi_communicator,ierr) 973 974 do l = 1, lmax_model 975 eps_model_m1_minus_one(:,l,iw) = cmplx_1*re_BUFFER_head(:,l)+ cmplx_i*im_BUFFER_head(:,l) 976 end do 977 end do 978 979 ABI_DEALLOCATE(re_BUFFER_head) 980 ABI_DEALLOCATE(im_BUFFER_head) 981 end if 982 983 call cpu_time(time2) 984 OPTION_TIMAB = 2 985 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 986 987 !================================================================================ 988 ! 989 !================================================================================ 990 991 992 time = time2-time1 993 timing_string = "# inverting / distributing : " 994 995 call write_timing_log(timing_string,time) 996 997 998 999 ABI_DEALLOCATE(re_buffer ) 1000 ABI_DEALLOCATE(im_buffer ) 1001 ABI_DEALLOCATE(sendcounts) 1002 ABI_DEALLOCATE(displs ) 1003 ABI_DEALLOCATE(VPV ) 1004 1005 1006 1007 GWLS_TIMAB = 1541 1008 OPTION_TIMAB = 2 1009 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 1010 1011 1012 end subroutine compute_eps_model_m1_minus_one
m_hamiltonian/generate_frequencies_and_weights [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
generate_frequencies_and_weights
FUNCTION
.
INPUTS
OUTPUT
PARENTS
gwls_ComputeCorrelationEnergy
CHILDREN
cpu_time,extract_svd,g_to_r,gr_to_g,hpsik,pc_k_valence_kernel setup_pk_model,sqmr,sqrt_vc_k,wf_block_distribute write_text_block_in_timing_log,write_timing_log,xmpi_sum,zgemm,zgesv
SOURCE
128 subroutine generate_frequencies_and_weights(npt_gauss) 129 !-------------------------------------------------------------------------------- 130 ! 131 ! This subroutine computes the frequencies and weights necessary for Gauss-Legendre 132 ! quadrature, and stores the results in module arrays. 133 ! 134 !-------------------------------------------------------------------------------- 135 136 !This section has been created automatically by the script Abilint (TD). 137 !Do not modify the following lines by hand. 138 #undef ABI_FUNC 139 #define ABI_FUNC 'generate_frequencies_and_weights' 140 !End of the abilint section 141 142 implicit none 143 144 integer, intent(in) :: npt_gauss 145 146 147 real(dp), allocatable :: list_omega_tmp(:) 148 real(dp), allocatable :: list_weights_tmp(:) 149 150 integer :: i 151 152 ! ************************************************************************* 153 154 ABI_ALLOCATE(list_omega_tmp, (npt_gauss)) 155 ABI_ALLOCATE(list_weights_tmp, (npt_gauss)) 156 157 call get_frequencies_and_weights_legendre(npt_gauss,list_omega_tmp,list_weights_tmp) 158 159 160 ABI_ALLOCATE(list_omega, (npt_gauss+1)) 161 ABI_ALLOCATE(list_weights, (npt_gauss+1)) 162 163 ! make sure the first frequency in zero! 164 list_omega(1) = zero 165 list_weights(1) = zero 166 167 do i = 1,npt_gauss 168 169 ! inverse the order of the frequency points, as they come out 170 ! in reverse order from the generating subroutine 171 list_omega (i+1) = list_omega_tmp (npt_gauss+1-i) 172 list_weights(i+1) = list_weights_tmp(npt_gauss+1-i) 173 174 end do 175 176 177 ABI_DEALLOCATE(list_omega_tmp) 178 ABI_DEALLOCATE(list_weights_tmp) 179 180 181 end subroutine generate_frequencies_and_weights
m_hamiltonian/ProjectedSternheimerEpsilon [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
ProjectedSternheimerEpsilon
FUNCTION
.
INPUTS
OUTPUT
PARENTS
gwls_ComputeCorrelationEnergy
CHILDREN
cpu_time,extract_svd,g_to_r,gr_to_g,hpsik,pc_k_valence_kernel setup_pk_model,sqmr,sqrt_vc_k,wf_block_distribute write_text_block_in_timing_log,write_timing_log,xmpi_sum,zgemm,zgesv
SOURCE
1103 subroutine ProjectedSternheimerEpsilon(lmax, npt_gauss, second_model_parameter, & 1104 list_projection_frequencies,nfrequencies,& 1105 epsilon_eigenvalues_0,debug,use_model) 1106 !---------------------------------------------------------------------------------------------------- 1107 ! This subroutine combines in a single subprogram the jobs of previous routines 1108 ! 1109 ! - setup_projected_Sternheimer_epsilon 1110 ! - compute_projected_Sternheimer_epsilon 1111 ! 1112 ! The purpose of this combination is to avoid independent loops on nbandv, requiring the 1113 ! arrays 1114 ! projected_epsilon_M_matrix 1115 ! projected_epsilon_B_matrix 1116 ! projected_epsilon_G_matrix 1117 ! 1118 ! from scaling like N^3, which grows very large with problem size. 1119 ! 1120 ! Thus, this routine: 1121 ! 1122 ! - Computes the frequency-dependent dielectric matrix in the Lanczos basis, using 1123 ! the projected Sternheimer equations. 1124 ! 1125 ! - Computes the frequency-dependent MODEL dielectric matrix in the complementary Lanczos basis. 1126 ! 1127 ! 1128 ! This routine will be verbose and write log files; indeed, large jobs crash in here, it will 1129 ! be important to know where/why! 1130 ! 1131 ! The subroutine also computes the matrix elements on epsilon_model(iw) in the Lanczos basis; 1132 ! this is done here to avoid preforming direct products with the valence states again later. 1133 !---------------------------------------------------------------------------------------------------- 1134 1135 !This section has been created automatically by the script Abilint (TD). 1136 !Do not modify the following lines by hand. 1137 #undef ABI_FUNC 1138 #define ABI_FUNC 'ProjectedSternheimerEpsilon' 1139 !End of the abilint section 1140 1141 implicit none 1142 1143 real(dp), parameter :: svd_tolerance = 1.0e-16_dp 1144 1145 integer, intent(in) :: lmax, npt_gauss 1146 integer, intent(in) :: nfrequencies 1147 real(dp), intent(in) :: list_projection_frequencies(nfrequencies) 1148 logical, intent(in) :: debug 1149 real(dp), intent(in) :: epsilon_eigenvalues_0(lmax) 1150 1151 logical,optional,intent(in) :: use_model 1152 1153 real(dp), intent(in) :: second_model_parameter 1154 1155 1156 integer :: l, l1, l2 1157 integer :: i, iw, v 1158 integer :: recy_i 1159 integer :: lsolutions_max, lsolutions, ls 1160 integer :: projection 1161 1162 complex(dpc), allocatable :: sternheimer_A0(:,:) 1163 complex(dpc), allocatable :: sternheimer_A(:,:) 1164 complex(dpc), allocatable :: sternheimer_B(:,:) 1165 complex(dpc), allocatable :: sternheimer_X(:,:) 1166 complex(dpc), allocatable :: sternheimer_G(:,:) 1167 1168 1169 complex(dpc), allocatable :: dummy_tmp_1(:,:) 1170 complex(dpc), allocatable :: dummy_tmp_2(:,:) 1171 1172 integer, allocatable :: ipiv(:) 1173 1174 1175 1176 complex(dpc),allocatable :: local_Lbasis(:,:) 1177 complex(dpc),allocatable :: local_Lbasis_conjugated(:,:) 1178 1179 complex(dpc),allocatable :: YL(:) 1180 1181 real(dp), allocatable :: psikg_in(:,:), psikg_out(:,:) 1182 1183 real(dp), allocatable :: psik_wrk(:,:), psikg_wrk(:,:), psikb_wrk(:,:) 1184 real(dp), allocatable :: psi_gamma_l1(:,:), psi_gamma_l2(:,:) 1185 1186 real(dp), allocatable :: psikg_valence(:,:) 1187 real(dp), allocatable :: psir_valence(:,:,:,:) 1188 1189 real(dp), allocatable :: psi_rhs(:,:,:) 1190 1191 real(dp), allocatable :: psikg_VL(:,:) 1192 1193 1194 complex(dpc), allocatable :: check_matrix(:,:), check_matrix2(:,:) 1195 complex(dpc), allocatable :: c_sternheimer_solutions(:,:) 1196 complex(dpc), allocatable :: QR_orthonormal_basis(:,:) 1197 1198 complex(dpc), allocatable :: svd_matrix(:,:) 1199 real (dp ), allocatable :: svd_values(:) 1200 1201 1202 integer :: iblk_lanczos, nbdblock_lanczos 1203 integer :: iblk_solutions, nbdblock_solutions 1204 integer :: mb 1205 1206 character(128) :: filename 1207 logical :: file_exists 1208 integer :: io_unit 1209 1210 character(128) :: filename_log 1211 integer :: io_unit_log 1212 1213 1214 real(dp) :: omega 1215 1216 character(256) :: timing_string 1217 real(dp) :: time1, time2 1218 real(dp) :: time_exact 1219 1220 1221 integer :: info 1222 integer :: ierr 1223 1224 real(dp) :: z(2) 1225 1226 1227 logical :: omega_is_imaginary 1228 real(dp) :: omega0 1229 1230 logical :: model 1231 logical :: write_debug 1232 1233 1234 integer :: mpi_communicator, mpi_rank, mpi_group 1235 1236 ! ************************************************************************* 1237 1238 1239 !================================================================================ 1240 ! Prepare MPI information 1241 !================================================================================ 1242 1243 ! for LA configuration ,The processors communicate over band+FFT 1244 mpi_communicator = mpi_enreg%comm_bandfft 1245 1246 ! what is the rank of this processor, within its group? 1247 mpi_rank = mpi_enreg%me_fft 1248 1249 ! Which group does this processor belong to, given the communicator? 1250 mpi_group = mpi_enreg%me_band 1251 1252 1253 1254 1255 !================================================================================ 1256 ! Setup a log file, to keep track of the algorithm 1257 !================================================================================ 1258 1259 1260 write(filename_log,'(A,I4.4,A)') 'ProjectedSternheimerEpsilon_PROC=',mpi_enreg%me,'.log' 1261 1262 io_unit_log = get_unit() 1263 open(io_unit_log,file=filename_log,status=files_status_new) 1264 write(io_unit_log,10) '' 1265 write(io_unit_log,10) '#====================================================================================================' 1266 write(io_unit_log,10) "# ProjectedSternheimerEpsilon: log file " 1267 write(io_unit_log,10) "# ------------------------------------------------------------------- " 1268 write(io_unit_log,10) "# " 1269 write(io_unit_log,10) "# This file tracks the algorithm in the routine ProjectedSternheimerEpsilon. The goal is to " 1270 write(io_unit_log,10) "# establish where the algorithm crashes if it does, and/or to track are far along the code is. " 1271 write(io_unit_log,10) '#' 1272 write(io_unit_log,10) '# MPI data for this process:' 1273 write(io_unit_log,10) '#' 1274 write(io_unit_log,22) '# mpi_rank :',mpi_rank,' (rank of this processor in its band group)' 1275 write(io_unit_log,22) '# mpi_group:',mpi_group,' (band group to which this processor belongs)' 1276 write(io_unit_log,10) '#====================================================================================================' 1277 flush(io_unit_log) 1278 1279 1280 !================================================================================ 1281 ! Setup timing; prepare arrays 1282 !================================================================================ 1283 1284 write(io_unit_log,10) " - Preparing and allocating arrays ...." 1285 flush(io_unit_log) 1286 1287 1288 1289 timing_string = "#" 1290 call write_text_block_in_Timing_log(timing_string) 1291 timing_string = "# ProjectedSternheimerEpsilon " 1292 call write_text_block_in_Timing_log(timing_string) 1293 timing_string = "#" 1294 call write_text_block_in_Timing_log(timing_string) 1295 1296 ! Allocate the module array 1297 ABI_ALLOCATE(projected_dielectric_Lanczos_basis, (lmax,lmax,npt_gauss+1)) 1298 1299 projected_dielectric_Lanczos_basis(:,:,:) = cmplx_0 1300 1301 ! initialize zero frequency with exact solution 1302 do l = 1, lmax 1303 projected_dielectric_Lanczos_basis(l,l,1) = cmplx_1*epsilon_eigenvalues_0(l) 1304 end do 1305 1306 1307 ! initialize other frequencies with the identity 1308 do iw = 2, npt_gauss + 1 1309 do l = 1, lmax 1310 projected_dielectric_Lanczos_basis(l,l,iw) = cmplx_1 1311 end do 1312 end do 1313 1314 time_exact = zero 1315 1316 1317 !================================================================================ 1318 ! Parallelisation of the code is subtle; Hamiltonian must act over 1319 ! FFT rows, we must be careful with memory, etc... 1320 ! 1321 ! We will parallelise in block of Lanczos vectors, not over bands. 1322 !================================================================================ 1323 1324 ! Number of blocks of lanczos vectors 1325 nbdblock_lanczos = lmax/blocksize 1326 if (modulo(lmax,blocksize) /= 0) nbdblock_lanczos = nbdblock_lanczos + 1 1327 1328 1329 if (present(use_model)) then 1330 model = use_model 1331 else 1332 model = .true. 1333 end if 1334 1335 if (model) then 1336 ! Prepare the array that will contain the matrix elements of the model operator 1337 ABI_ALLOCATE(model_dielectric_Lanczos_basis, (lmax,lmax,npt_gauss+1)) 1338 1339 ! initialize with zero. NOT with the identity, in order to avoid extra communications 1340 ! (see below) 1341 model_dielectric_Lanczos_basis(:,:,:) = cmplx_0 1342 1343 end if 1344 1345 ! various working arrays 1346 1347 ABI_ALLOCATE(psikg_valence ,(2,npw_g)) 1348 ABI_ALLOCATE(psir_valence ,(2,n4,n5,n6)) 1349 1350 1351 ABI_ALLOCATE(psikg_VL ,(2,npw_g)) 1352 1353 1354 ABI_ALLOCATE(psik_wrk ,(2,npw_k)) 1355 ABI_ALLOCATE(psikb_wrk ,(2,npw_kb)) 1356 ABI_ALLOCATE(psikg_wrk ,(2,npw_g)) 1357 1358 1359 ABI_ALLOCATE(psi_gamma_l1 ,(2,npw_k)) 1360 ABI_ALLOCATE(psi_gamma_l2 ,(2,npw_k)) 1361 1362 ABI_ALLOCATE(psi_rhs ,(2,npw_k,lmax)) 1363 1364 1365 ABI_ALLOCATE(psikg_in ,(2,npw_g)) 1366 ABI_ALLOCATE(psikg_out ,(2,npw_g)) 1367 1368 1369 ! maximal possible dimension of the solution space 1370 ! +1 because the solutions at $\omega=\infty$ are free. 1371 ! +1 if recycling is activated, because the solutions at $\omega=0$ are then available. 1372 i=1 1373 if(dtset%gwls_recycle == 1 .or. dtset%gwls_recycle == 2) then 1374 i=2 1375 end if 1376 lsolutions_max = lmax*(nfrequencies+i) 1377 1378 1379 ABI_ALLOCATE(local_Lbasis, (npw_k,lmax)) 1380 ABI_ALLOCATE(local_Lbasis_conjugated,(npw_k,lmax)) 1381 ABI_ALLOCATE(YL,(npw_k)) 1382 1383 ABI_ALLOCATE(c_sternheimer_solutions,(npw_k,lsolutions_max)) 1384 ABI_ALLOCATE(QR_orthonormal_basis ,(npw_k,lsolutions_max)) 1385 1386 1387 omega_is_imaginary = .true. 1388 1389 1390 ABI_ALLOCATE(svd_matrix,(npw_k,lsolutions_max)) 1391 ABI_ALLOCATE(svd_values,(lsolutions_max)) 1392 1393 1394 ! Prepare files for writing 1395 write_debug = debug .and. mpi_enreg%me == 0 1396 1397 if ( write_debug ) then 1398 1399 write(filename,'(A)') "ProjectedSternheimerEpsilon.log" 1400 inquire(file=filename,exist=file_exists) 1401 1402 i = 0 1403 do while (file_exists) 1404 i = i+1 1405 write (filename,'(A,I0.4,A)') "ProjectedSternheimerEpsilon_",i,".log" 1406 inquire(file=filename,exist=file_exists) 1407 end do 1408 1409 1410 io_unit = get_unit() 1411 open(io_unit,file=filename,status=files_status_new) 1412 write(io_unit,10) '' 1413 write(io_unit,10) '#====================================================================================================' 1414 write(io_unit,10) "# Building the dielectic matrix using projected Sternheimer equation " 1415 write(io_unit,10) "# ------------------------------------------------------------------- " 1416 write(io_unit,10) "# " 1417 write(io_unit,10) '# This file contains some tests to check if the various elements entering the projected ' 1418 write(io_unit,10) '# dielectric matrix have the right properties. At this point, this is mostly for debugging.' 1419 write(io_unit,10) '# The wavefunctions and other related arrays are stored in reciprocal space.' 1420 write(io_unit,10) '#' 1421 write(io_unit,10) '#====================================================================================================' 1422 write(io_unit,10) '' 1423 flush(io_unit) 1424 end if 1425 1426 if (debug) then 1427 ABI_ALLOCATE(check_matrix ,(lsolutions_max,lsolutions_max)) 1428 ABI_ALLOCATE(check_matrix2,(lsolutions_max,lsolutions_max)) 1429 end if 1430 1431 !================================================================================ 1432 ! Loop on all valence bands 1433 !================================================================================ 1434 1435 1436 1437 1438 do v = 1, nbandv 1439 write(io_unit_log,10) '#====================================================================================================' 1440 write(io_unit_log,20) '# valence band index:', v 1441 write(io_unit_log,10) '#====================================================================================================' 1442 flush(io_unit_log) 1443 1444 1445 1446 1447 if ( write_debug ) then 1448 write(io_unit,10) '#====================================================================================================' 1449 write(io_unit,20) '# valence band index:', v 1450 write(io_unit,10) '#====================================================================================================' 1451 flush(io_unit) 1452 end if 1453 1454 write(io_unit_log,10) ' - Fourier transform valence state ...' 1455 flush(io_unit_log) 1456 1457 1458 ! copy pre-calculated valence state in this covenient local array 1459 psikg_valence(:,:) = kernel_wavefunctions_FFT(:,:,v) 1460 1461 ! compute fourier transform of valence state, and conjugate 1462 call g_to_r(psir_valence,psikg_valence) 1463 psir_valence(2,:,:,:) = -psir_valence(2,:,:,:) 1464 1465 ! loop on all blocks of lanczos vectors 1466 write(io_unit_log,10) ' - Loop on all lanczos blocks to generate modified basis and Sternheimer RHS:' 1467 flush(io_unit_log) 1468 do iblk_lanczos = 1, nbdblock_lanczos 1469 !-------------------------------------------------------------------------- 1470 ! Below, we build the modified basis, [ (V^{1/2} l)^* . psi_v], 1471 ! as well as conjugated, projected form, Pc . [ (V^{1/2} l) . psi_v^*]. 1472 ! 1473 ! It is very irritating to have to do it this way, but I don't 1474 ! see an alternative; see discussion below. 1475 ! 1476 !-------------------------------------------------------------------------- 1477 1478 write(io_unit_log,23) ' iblk_lanczos = ',iblk_lanczos," / ",nbdblock_lanczos 1479 1480 1481 write(io_unit_log,10) ' -- Prepare modified basis computation...' 1482 flush(io_unit_log) 1483 1484 1485 1486 ! loop on all states within this block 1487 do mb = 1, blocksize 1488 1489 ! Determine the index of the Lanczos vector 1490 l = (iblk_lanczos-1)*blocksize + mb 1491 1492 ! take a single lanczos vector 1493 1494 if ( l <= lmax) then 1495 psik_wrk(1,:) = dble (Lbasis_lanczos(:,l)) 1496 psik_wrk(2,:) = dimag(Lbasis_lanczos(:,l)) 1497 else 1498 psik_wrk(:,:) = zero 1499 end if 1500 1501 1502 ! Apply coulomb potential 1503 call sqrt_vc_k(psik_wrk) 1504 1505 ! Store in array of blocks of wavefunctions 1506 psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:) 1507 1508 end do ! mb 1509 1510 ! Transform to FFT representation 1511 call wf_block_distribute(psikb_wrk, psikg_VL,1) ! LA -> FFT 1512 1513 ! psikg_VL now contains | V^1/2 . l >, in FFT configuration 1514 1515 1516 !---------------------------------------------------------- 1517 ! i) Compute the modified basis 1518 !---------------------------------------------------------- 1519 write(io_unit_log,10) ' -- compute modified basis ...' 1520 flush(io_unit_log) 1521 1522 1523 1524 ! Fourier transform to real space, and conjugate (psir1 is a global work array) 1525 call g_to_r(psir1,psikg_VL) 1526 psir1(2,:,:,:) = -psir1(2,:,:,:) ! IS THIS STACK-DANGEROUS? 1527 1528 ! Compute the real space product, and return to k space, in FFT configuration 1529 call gr_to_g(psikg_wrk,psir1,psikg_valence) 1530 1531 ! psikg_wrk contains | (V^1/2 . l)^* phi_v >, in FFT configuration 1532 1533 ! return to LA representation 1534 call wf_block_distribute(psikb_wrk, psikg_wrk,2) ! FFT -> LA 1535 1536 ! store data, in LA representation 1537 do mb = 1, blocksize 1538 l = (iblk_lanczos-1)*blocksize + mb 1539 1540 if ( l <= lmax) then 1541 ! local_Lbasis 1542 psik_wrk(:,:) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) 1543 local_Lbasis(:,l) = cmplx_1*psik_wrk(1,:)+cmplx_i*psik_wrk(2,:) 1544 end if 1545 1546 end do !mb 1547 1548 !-------------------------------------------------------------------------- 1549 ! ii) Build the Sternheimer coefficients, at various frequencies, 1550 ! to define the Sternheimer basis. 1551 !-------------------------------------------------------------------------- 1552 write(io_unit_log,20) ' -- Compute Sternheimer RHS...' 1553 flush(io_unit_log) 1554 1555 1556 ! psikg_wrk still contains | (V^1/2 . l)^* phi_v >, in FFT configuration 1557 1558 ! Create right-hand-side of Sternheimer equation, in FFT configuration 1559 call pc_k_valence_kernel(psikg_wrk) 1560 call Hpsik(psikg_in,psikg_wrk,eig(v)) 1561 call pc_k_valence_kernel(psikg_in) 1562 psikg_in(:,:) = -psikg_in(:,:) ! IS THIS STACK-DANGEROUS? 1563 1564 ! return RHS to LA representation, for explicit storage 1565 call wf_block_distribute(psikb_wrk, psikg_in,2) ! FFT -> LA 1566 1567 ! store data, in LA representation 1568 do mb = 1, blocksize 1569 l = (iblk_lanczos-1)*blocksize + mb 1570 1571 if ( l <= lmax) then 1572 ! psi_rhs 1573 psi_rhs(:,:,l) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) 1574 end if 1575 1576 end do !mb 1577 1578 !---------------------------------------------------------- 1579 ! iii) extract solutions for all projection frequencies 1580 !---------------------------------------------------------- 1581 write(io_unit_log,20) ' -- Extract solutions for all projection frequencies...' 1582 flush(io_unit_log) 1583 1584 1585 1586 do iw = 1, nfrequencies 1587 1588 omega0 = list_projection_frequencies(iw) 1589 ! Solve Sternheimer equation 1590 1591 projection = 0 1592 if(omega0 < 1d-12) projection=1 1593 1594 ! solve A x = b, over the whole lanczos block 1595 call sqmr(psikg_in, psikg_out, eig(v), projection, omega0, omega_is_imaginary) 1596 1597 ! return LA representation, for explicit storage 1598 call wf_block_distribute(psikb_wrk, psikg_out, 2) ! FFT -> LA 1599 1600 do mb = 1, blocksize 1601 l = (iblk_lanczos-1)*blocksize + mb 1602 1603 if ( l <= lmax) then 1604 ls = (l-1)*nfrequencies+iw 1605 1606 psik_wrk(:,:) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) 1607 1608 c_sternheimer_solutions(:,ls)= cmplx_1*psik_wrk(1,:)+cmplx_i*psik_wrk(2,:) 1609 end if 1610 1611 end do ! mb 1612 1613 end do ! iw 1614 1615 !---------------------------------------------------------- 1616 ! iv) Compute the conjugated, projected modified basis 1617 !---------------------------------------------------------- 1618 1619 write(io_unit_log,20) ' -- compute the conjugated, projected modified basis...' 1620 flush(io_unit_log) 1621 1622 1623 1624 1625 ! Compute the real space product, | (V^1/2. l) phi_v^* > and return to k space, in FFT configuration 1626 call gr_to_g(psikg_wrk, psir_valence, psikg_VL) 1627 1628 ! project on conduction states 1629 call pc_k_valence_kernel(psikg_wrk) 1630 1631 ! return to LA representation 1632 call wf_block_distribute(psikb_wrk, psikg_wrk,2) ! FFT -> LA 1633 1634 ! store back, in LA configuration 1635 do mb = 1, blocksize 1636 l = (iblk_lanczos-1)*blocksize + mb 1637 1638 if ( l <= lmax) then 1639 psik_wrk(:,:) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) 1640 local_Lbasis_conjugated(:,l) = cmplx_1*psik_wrk(1,:)+cmplx_i*psik_wrk(2,:) 1641 end if 1642 1643 end do !mb 1644 1645 end do ! iblk_lanczos 1646 1647 1648 1649 write(io_unit_log,10) ' - Read in solutions at w=0 and/or w = infinity, if appropriate...' 1650 flush(io_unit_log) 1651 1652 ! Begin with the storage of the solutions at $\omega = 0.$, which are free. 1653 if(dtset%gwls_recycle == 1) then 1654 c_sternheimer_solutions(:,lsolutions_max-2*lmax+1:lsolutions_max-lmax) = cmplx_1*Sternheimer_solutions_zero(1,:,:,v) + & 1655 & cmplx_i*Sternheimer_solutions_zero(2,:,:,v) 1656 end if 1657 if(dtset%gwls_recycle == 2) then 1658 do i=1,lmax 1659 recy_i = (i-1)*nbandv + v 1660 !BUG : On petrus, NAG 5.3.1 + OpenMPI 1.6.2 cause read(...,rec=i) to read the data written by write(...,rec=i+1). 1661 read(recy_unit,rec=recy_i) psik_wrk 1662 c_sternheimer_solutions(:,lsolutions_max-2*lmax+i) = cmplx_1*psik_wrk(1,:) + cmplx_i*psik_wrk(2,:) 1663 end do 1664 end if 1665 1666 ! and then continue with the storage of the vectors on which the Sternheimer solutions will be projected. 1667 c_sternheimer_solutions(:,lsolutions_max-lmax+1:lsolutions_max) = cmplx_1*psi_rhs(1,:,:) + cmplx_i*psi_rhs(2,:,:) 1668 ! Previously was = local_Lbasis; but analysis in the Lanczos article reveals psi_rhs should be better. 1669 ! Furthermore, tests show that, with psi_rhs, silane@1Ha has Sigma_c 0.01mHa away from the result with 1670 ! gwls_list_proj_freq 0.0 1.0, in contrast with local_Lbasis, which has Sigma_c 0.3mHa away from the same result. 1671 1672 if ( model ) then 1673 1674 write(io_unit_log,10) ' - USE MODEL: model = .true., hence compute model model dielectric matrix...' 1675 flush(io_unit_log) 1676 1677 1678 1679 !-------------------------------------------------------------------------- 1680 ! Now that we have the modified basis, compute the matrix 1681 ! elements of the model dielectric operator 1682 ! 1683 ! CAREFUL! 1684 ! 1685 ! The model is given by 1686 ! 1687 ! P_model(iw) = sum_{v} phi_v(r) P_c.Y(iw).P_c phi_v^*(r') 1688 ! 1689 ! such that 1690 ! 1691 ! <l1 | eps_model(iw) | l2 > = delta_{l1,l2} 1692 ! - sum_{v} < (V^{1/2}.l1).phi_v^*| Pc . Y . Pc | (V^{1/2}.l2).phi_v^* > 1693 ! 1694 ! But local_Lbasis defined above corresponds to 1695 ! Pc | (V^{1/2} .l )^* phi_v >. 1696 ! 1697 ! This is why we must define local_Lbasis_conjugated, of the form 1698 ! Pc | (V^{1/2} .l ) phi_v^* >. 1699 ! 1700 !-------------------------------------------------------------------------- 1701 1702 do iw = 1, npt_gauss+1 1703 1704 call setup_Pk_model(list_omega(iw),second_model_parameter) 1705 1706 ! Only build the lower triangular part; the upper triangular part is obtained from the Hermitian conjugate 1707 do l1 = 1, lmax 1708 1709 YL(:) = model_Y_LA(:)*local_Lbasis_conjugated(:,l1) 1710 do l2 = 1, l1 1711 model_dielectric_Lanczos_basis(l1,l2,iw) = model_dielectric_Lanczos_basis(l1,l2,iw) & 1712 -complex_vector_product(YL, local_Lbasis_conjugated(:,l2),npw_k) 1713 1714 end do 1715 end do 1716 1717 end do ! iw 1718 1719 1720 end if 1721 1722 1723 !-------------------------------------------------------------------------- 1724 ! Check explicitly that solutions satisfy the Sternheimer equations 1725 !-------------------------------------------------------------------------- 1726 1727 if ( debug ) then 1728 1729 if (write_debug) then 1730 write(io_unit,10) "#--------------------------------------------------------------------------------" 1731 write(io_unit,10) "# Check explicitly that solutions satisfy the Sternheimer equation. " 1732 write(io_unit,10) "# " 1733 write(io_unit,10) "# Define: " 1734 write(io_unit,10) "# E_l = || (omega^2+[H-Ev]^2) |phi_l> + Pc.[H-Ev].Pc |(V^1/2.q_l)^*.phi_v > || " 1735 write(io_unit,10) "#--------------------------------------------------------------------------------" 1736 write(io_unit,10) '# l Im[omega] (Ha) E_l' 1737 write(io_unit,10) "#--------------------------------------------------------------------------------" 1738 flush(io_unit) 1739 end if 1740 1741 1742 do iblk_lanczos = 1, nbdblock_lanczos 1743 1744 ! loop on all states within this block 1745 do mb = 1, blocksize 1746 1747 l = (iblk_lanczos-1)*blocksize + mb 1748 1749 1750 if ( l <= lmax) then 1751 ! psik_wrk = | (V^{1/2} .l )^* phi_v > 1752 psik_wrk(1,:) = dble (local_Lbasis(:,l)) 1753 psik_wrk(2,:) = dimag(local_Lbasis(:,l)) 1754 else 1755 psik_wrk(:,:) = zero 1756 end if 1757 1758 ! Store in array of blocks of wavefunctions 1759 psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:) 1760 end do ! mb 1761 1762 ! Transform to FFT representation 1763 call wf_block_distribute(psikb_wrk, psikg_wrk, 1) ! LA -> FFT 1764 1765 ! Create right-hand-side of Sternheimer equation 1766 call pc_k_valence_kernel(psikg_wrk) 1767 call Hpsik(psikg_in,psikg_wrk,eig(v)) 1768 call pc_k_valence_kernel(psikg_in) 1769 psikg_in(:,:) = -psikg_in(:,:) ! IS THIS STACK-DANGEROUS? 1770 1771 do iw = 1, nfrequencies 1772 ! loop on all states within this block 1773 do mb = 1, blocksize 1774 1775 l = (iblk_lanczos-1)*blocksize + mb 1776 1777 ls = (l-1)*nfrequencies+iw 1778 1779 if ( l <= lmax) then 1780 psik_wrk(1,:) = dble (c_sternheimer_solutions(:,ls)) 1781 psik_wrk(2,:) = dimag(c_sternheimer_solutions(:,ls)) 1782 else 1783 psik_wrk(:,:) = zero 1784 end if 1785 1786 ! Store in array of blocks of wavefunctions 1787 psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:) 1788 end do 1789 1790 ! Transform to FFT representation 1791 call wf_block_distribute(psikb_wrk, psikg_wrk, 1) ! LA -> FFT 1792 1793 1794 omega0 = list_projection_frequencies(iw) 1795 1796 psikg_out(:,:) = omega0**2*psikg_wrk(:,:) 1797 1798 call Hpsik(psikg_wrk,cte=eig(v)) 1799 call Hpsik(psikg_wrk,cte=eig(v)) 1800 1801 1802 psikg_out(:,:) = psikg_out(:,:) + psikg_wrk(:,:)-psikg_in(:,:) 1803 ! psikg_out now contains [ w0^2 + [H-epsilon_v]^2 ] | x > - |RHS>, in FFT configuration. 1804 1805 ! bring it back to LA configuration 1806 1807 ! Transform to FFT representation 1808 call wf_block_distribute(psikb_wrk, psikg_out, 2) ! FFT -> LA 1809 1810 do mb = 1, blocksize 1811 l = (iblk_lanczos-1)*blocksize + mb 1812 1813 if ( l <= lmax) then 1814 psik_wrk(:,:) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) 1815 1816 z(:) = cg_zdotc(npw_k ,psik_wrk, psik_wrk) 1817 1818 call xmpi_sum(z, mpi_communicator,ierr) ! sum on all processors working on FFT! 1819 if (write_debug) write(io_unit,21) l, omega0, sqrt(z(1)) 1820 end if 1821 end do ! mb 1822 1823 end do ! iw 1824 end do ! iblk_lanczos 1825 1826 end if 1827 1828 !-------------------------------------------------------------------------- 1829 ! Step 5: Perform a singular value decomposition to extract a 1830 ! linearly independent basis for the solution space. 1831 ! 1832 !-------------------------------------------------------------------------- 1833 write(io_unit_log,10) ' - Perform SVD to extract linearly independent basis to Sternheimer equation...' 1834 flush(io_unit_log) 1835 1836 1837 1838 1839 svd_matrix(:,:) = c_sternheimer_solutions(:,:) 1840 1841 call extract_SVD(mpi_communicator, npw_k,lsolutions_max,svd_matrix,svd_values) 1842 1843 if ( write_debug ) then 1844 write(io_unit,10) "#--------------------------------------------------------------------------------" 1845 write(io_unit,10) "# Check the singular value decomposition of the arrays" 1846 write(io_unit,10) '# l svd' 1847 write(io_unit,10) "#--------------------------------------------------------------------------------" 1848 flush(io_unit) 1849 end if 1850 1851 lsolutions = 0 1852 QR_orthonormal_basis(:,:) = cmplx_0 1853 do l=1, lsolutions_max 1854 if (svd_values(l) > svd_tolerance ) then 1855 lsolutions = lsolutions + 1 1856 1857 if ( write_debug ) then 1858 write(io_unit,14) l,svd_values(l) 1859 flush(io_unit) 1860 end if 1861 QR_orthonormal_basis(:,l) = svd_matrix(:,l) 1862 1863 else 1864 if ( write_debug ) then 1865 write(io_unit,15) l,svd_values(l),' SVD value too small! Vector to be discarded!' 1866 flush(io_unit) 1867 end if 1868 end if 1869 end do 1870 1871 !-------------------------------------------------------------------------- 1872 ! Step 6: project all relevant arrays onto the newly defined orthonormal 1873 ! basis. 1874 !-------------------------------------------------------------------------- 1875 write(io_unit_log,10) ' - Compute the B matrix...' 1876 flush(io_unit_log) 1877 1878 if (debug) then 1879 check_matrix(:,:) = cmplx_0 1880 do l = 1, lsolutions 1881 check_matrix(l,l) = -cmplx_1 1882 end do 1883 end if 1884 1885 ABI_ALLOCATE(ipiv ,(lsolutions)) 1886 ABI_ALLOCATE(sternheimer_A0 ,(lsolutions,lsolutions)) 1887 ABI_ALLOCATE(sternheimer_B ,(lsolutions,lmax)) 1888 ABI_ALLOCATE(sternheimer_G ,(lmax,lsolutions)) 1889 1890 ! Compute the X matrix and the check_matrix 1891 do l1 = 1, lsolutions 1892 1893 psi_gamma_l1(1,:) = real (QR_orthonormal_basis(:,l1)) 1894 psi_gamma_l1(2,:) = dimag(QR_orthonormal_basis(:,l1)) 1895 1896 do l2 = 1, lsolutions 1897 1898 if (l2 <= lmax) then 1899 z(:) = cg_zdotc(npw_k, psi_gamma_l1, psi_rhs(:,:,l2)) 1900 call xmpi_sum(z, mpi_communicator,ierr) ! sum on all processors for LA configuration 1901 1902 sternheimer_B(l1,l2) = cmplx_1*z(1)+cmplx_i*z(2) 1903 end if 1904 1905 if (debug) then 1906 psi_gamma_l2(1,:) = dble (QR_orthonormal_basis(:,l2)) 1907 psi_gamma_l2(2,:) = dimag(QR_orthonormal_basis(:,l2)) 1908 1909 z(:) = cg_zdotc(npw_k, psi_gamma_l1, psi_gamma_l2) 1910 call xmpi_sum(z, mpi_communicator,ierr) ! sum on all processors 1911 1912 check_matrix(l1,l2) = check_matrix(l1,l2) + cmplx_1*z(1)+cmplx_i*z(2) 1913 end if 1914 1915 end do ! l2 1916 end do ! l1 1917 1918 1919 ! Number of blocks of solution vectors 1920 nbdblock_solutions = lsolutions/blocksize 1921 1922 if (modulo(lsolutions,blocksize) /= 0) nbdblock_solutions = nbdblock_solutions + 1 1923 1924 1925 write(io_unit_log,10) ' - Compute the A0 matrix...' 1926 flush(io_unit_log) 1927 1928 ! Compute the A matrix 1929 do iblk_solutions =1, nbdblock_solutions 1930 1931 do mb = 1, blocksize 1932 l2 = (iblk_solutions-1)*blocksize + mb 1933 1934 if ( l2 <= lsolutions) then 1935 psik_wrk(1,:) = dble (QR_orthonormal_basis(:,l2)) 1936 psik_wrk(2,:) = dimag(QR_orthonormal_basis(:,l2)) 1937 else 1938 psik_wrk(:,:) = zero 1939 end if 1940 1941 ! Store in array of blocks of wavefunctions 1942 psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:) 1943 end do 1944 1945 ! Transform to FFT representation 1946 call wf_block_distribute(psikb_wrk, psikg_wrk,1) ! LA -> FFT 1947 1948 ! act twice with the Hamiltonian operator 1949 call Hpsik(psikg_out,psikg_wrk,eig(v)) 1950 call Hpsik(psikg_out,cte=eig(v)) 1951 1952 ! return to LA representation 1953 call wf_block_distribute(psikb_wrk, psikg_out,2) ! FFT -> LA 1954 1955 do mb = 1, blocksize 1956 l2 = (iblk_solutions-1)*blocksize + mb 1957 1958 if ( l2 <= lsolutions) then 1959 psik_wrk(:,:) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) 1960 else 1961 psik_wrk(:,:) = zero 1962 end if 1963 1964 do l1 = 1, lsolutions 1965 1966 psi_gamma_l1(1,:) = real (QR_orthonormal_basis(:,l1)) 1967 psi_gamma_l1(2,:) = dimag(QR_orthonormal_basis(:,l1)) 1968 1969 z(:) = cg_zdotc(npw_k, psi_gamma_l1, psik_wrk) 1970 call xmpi_sum(z,mpi_communicator,ierr) ! sum on all processors working on FFT! 1971 1972 1973 if ( l2 <= lsolutions ) then 1974 sternheimer_A0(l1,l2) = cmplx_1*z(1)+cmplx_i*z(2) 1975 end if 1976 1977 1978 end do ! l1 1979 end do ! mb 1980 end do ! iblk_solutions 1981 1982 1983 if (debug) then 1984 ! HERE we use a dummy variable to avoid operations which might blow the stack! 1985 ! Stack overflows lead to hard-to-find bugs; let's avoid putting them in here. 1986 ABI_ALLOCATE(dummy_tmp_1,(lsolutions,lsolutions)) 1987 ABI_ALLOCATE(dummy_tmp_2,(lsolutions,lsolutions)) 1988 1989 1990 dummy_tmp_1(:,:)= transpose(sternheimer_A0(:,:)) 1991 dummy_tmp_2(:,:)= conjg(dummy_tmp_1(:,:)) 1992 1993 check_matrix2(:,:) = sternheimer_A0(:,:)-dummy_tmp_2(:,:) 1994 1995 ABI_DEALLOCATE(dummy_tmp_1) 1996 ABI_DEALLOCATE(dummy_tmp_2) 1997 end if 1998 1999 write(io_unit_log,10) ' - Compute the GAMMA matrix...' 2000 flush(io_unit_log) 2001 2002 2003 ! Compute the GAMMA matrices 2004 do l1 = 1, lmax 2005 2006 ! psik_wrk = | (V^{1/2} . l )^* phi_v > 2007 psik_wrk(1,:) = dble (local_Lbasis(:,l1) ) 2008 psik_wrk(2,:) = dimag(local_Lbasis(:,l1)) 2009 2010 2011 do l2 = 1, lsolutions 2012 2013 psi_gamma_l2(1,:) = real (QR_orthonormal_basis(:,l2)) 2014 psi_gamma_l2(2,:) = dimag(QR_orthonormal_basis(:,l2)) 2015 2016 2017 ! Note that G_{lJ} = < l | Vc^{1/2}.(gamma_J^*.phi_v)> 2018 ! = < gamma_J | (Vc^{1/2}.l^*).phi_v > 2019 2020 z(:) = cg_zdotc(npw_k,psi_gamma_l2, psik_wrk) 2021 call xmpi_sum(z,mpi_communicator,ierr) ! sum on all processors working on FFT! 2022 sternheimer_G(l1,l2) = cmplx_1*z(1)+cmplx_i*z(2) 2023 2024 end do ! l1 2025 end do ! l2 2026 2027 2028 2029 if ( write_debug ) then 2030 write(io_unit,19) '<gamma| gamma>', sqrt(sum(abs(check_matrix(:,:))**2)) 2031 write(io_unit,19) ' M hermitian ',sqrt(sum(abs(check_matrix2(:,:))**2)) 2032 write(io_unit,10) " " 2033 write(io_unit,10) "# GAMMA Matrix:" 2034 write(io_unit,10) " " 2035 2036 do l1 = 1, lmax 2037 write(io_unit,30) sternheimer_G(l1,:) 2038 end do 2039 flush(io_unit) 2040 end if 2041 2042 !-------------------------------------------------------------------------- 2043 ! Step 7: Compute the solutions 2044 !-------------------------------------------------------------------------- 2045 2046 write(io_unit_log,10) ' - Compute the Projected Sternheimer solutions and build approximate dielectric operator...' 2047 flush(io_unit_log) 2048 2049 2050 2051 ABI_ALLOCATE(sternheimer_A ,(lsolutions,lsolutions)) 2052 ABI_ALLOCATE(sternheimer_X ,(lsolutions,lmax)) 2053 do iw = 2, npt_gauss + 1 2054 2055 write(io_unit_log,23) ' -- iw = ',iw,' / ',npt_gauss+1 2056 flush(io_unit_log) 2057 2058 2059 omega = list_omega(iw) 2060 2061 sternheimer_A(:,:) = sternheimer_A0(:,:) 2062 2063 do l = 1, lsolutions 2064 sternheimer_A(l,l) = sternheimer_A(l,l) + omega**2 2065 end do 2066 2067 sternheimer_X(:,:) = sternheimer_B(:,:) 2068 2069 !-------------------------------------------------------------------------- 2070 ! Step 2: solve A*X = B, a projected form of the Sternheimer equation 2071 !-------------------------------------------------------------------------- 2072 write(io_unit_log,10) ' -- Solve A*X = B' 2073 flush(io_unit_log) 2074 2075 2076 2077 call cpu_time(time1) 2078 call zgesv(lsolutions, & ! number of rows of A matrix 2079 lmax, & ! number of columns of B matrix 2080 sternheimer_A, & ! The A matrix on input, the LU factorization on output 2081 lsolutions, & ! leading dimension of A 2082 ipiv, & ! array of pivots 2083 sternheimer_X, & ! B matrix on input, solution X on output 2084 lsolutions, & ! leading dimension of B 2085 info ) 2086 call cpu_time(time2) 2087 2088 time_exact = time_exact + time2-time1 2089 2090 !-------------------------------------------------------------------------- 2091 ! Step 3: Add contribution to projected epsilon 2092 !-------------------------------------------------------------------------- 2093 ! perform E = E -4 sum_l Gamma_v*X^*_v 2094 2095 write(io_unit_log,10) ' -- add contribution to dielectric matrix at this frequency' 2096 flush(io_unit_log) 2097 2098 2099 2100 ABI_ALLOCATE(dummy_tmp_1,(lsolutions,lmax)) 2101 dummy_tmp_1(:,:) = conjg(sternheimer_X) ! DO THIS to avoid potential stack problems 2102 2103 call cpu_time(time1) 2104 call zgemm( 'N', & ! A matrix is in normal order 2105 'N', & ! B matrix is in normal order 2106 lmax, & ! number of rows of A 2107 lmax, & ! number of columns of B 2108 lsolutions, & ! number of columns of A 2109 -4.0_dp*cmplx_1, & ! premultiply A*B by this scalar 2110 sternheimer_G, & ! GAMMA matrix 2111 lmax, & ! leading dimension of A 2112 dummy_tmp_1, & ! B matrix 2113 lsolutions, & ! leading dimension of B 2114 cmplx_1, & ! beta is one 2115 projected_dielectric_Lanczos_basis(:,:,iw), & ! C matrix 2116 lmax) ! leading dimension of C 2117 2118 ABI_DEALLOCATE(dummy_tmp_1) 2119 2120 call cpu_time(time2) 2121 time_exact = time_exact + time2-time1 2122 2123 end do ! iw 2124 2125 write(io_unit_log,10) ' - Deallocate tmp arrays...' 2126 flush(io_unit_log) 2127 2128 2129 2130 2131 ABI_DEALLOCATE(ipiv ) 2132 ABI_DEALLOCATE(sternheimer_A ) 2133 ABI_DEALLOCATE(sternheimer_A0 ) 2134 ABI_DEALLOCATE(sternheimer_B ) 2135 ABI_DEALLOCATE(sternheimer_X ) 2136 ABI_DEALLOCATE(sternheimer_G ) 2137 2138 end do ! v 2139 2140 !-------------------------------------------------------------------------- 2141 ! Finalize, post v loop 2142 !-------------------------------------------------------------------------- 2143 write(io_unit_log,10) " - Finalize, after band iterations...." 2144 flush(io_unit_log) 2145 2146 2147 2148 2149 2150 timing_string = "# Exact Sector : " 2151 call write_timing_log(timing_string,time_exact) 2152 2153 2154 ABI_ALLOCATE(dummy_tmp_1,(lmax,lmax)) 2155 ABI_ALLOCATE(dummy_tmp_2,(lmax,lmax)) 2156 2157 do iw = 2, npt_gauss+1 2158 ! finally, make sure matrix is hermitian 2159 2160 ! play this little game to avoid STACK problems 2161 dummy_tmp_1(:,:) = 0.5_dp*transpose(projected_dielectric_Lanczos_basis(:,:,iw)) 2162 dummy_tmp_2(:,:) = conjg(dummy_tmp_1(:,:)) 2163 dummy_tmp_1(:,:) = dummy_tmp_2(:,:) +0.5_dp*projected_dielectric_Lanczos_basis(:,:,iw) 2164 2165 projected_dielectric_Lanczos_basis(:,:,iw) = dummy_tmp_1(:,:) 2166 end do 2167 2168 ABI_DEALLOCATE(dummy_tmp_1) 2169 ABI_DEALLOCATE(dummy_tmp_2) 2170 2171 2172 2173 if ( write_debug ) then 2174 !write some results to a file 2175 2176 write(io_unit,10) '#====================================================================================================' 2177 write(io_unit,10) "# Projected dielectric matrices " 2178 write(io_unit,10) "# ------------------------------------------------------- " 2179 write(io_unit,10) "# This file contains the various projected dielectric matrices as a function of frequency. " 2180 write(io_unit,10) '#====================================================================================================' 2181 write(io_unit,10) '' 2182 flush(io_unit) 2183 2184 do iw = 1, npt_gauss+1 2185 write(io_unit,10) "#" 2186 write(io_unit,12) "# omega = ",list_omega(iw), " i Ha" 2187 write(io_unit,10) "#" 2188 2189 do l =1, lmax 2190 write(io_unit,30) projected_dielectric_Lanczos_basis(l,:,iw) 2191 end do 2192 end do 2193 2194 end if 2195 2196 2197 if ( model ) then 2198 2199 2200 ! Add all components on the processors 2201 call xmpi_sum(model_dielectric_Lanczos_basis,mpi_communicator,ierr) ! sum on all processors 2202 2203 ! add the identity 2204 do iw = 1, npt_gauss+1 2205 do l= 1, lmax 2206 model_dielectric_Lanczos_basis(l,l,iw) = model_dielectric_Lanczos_basis(l,l,iw) + cmplx_1 2207 end do 2208 end do 2209 2210 ! hermitian the operator 2211 do iw = 1, npt_gauss+1 2212 2213 do l1 = 1, lmax 2214 do l2 = 1, l1 2215 ! operator is hermitian 2216 model_dielectric_Lanczos_basis(l2,l1,iw) = conjg(model_dielectric_Lanczos_basis(l1,l2,iw)) 2217 end do 2218 end do 2219 2220 end do ! iw 2221 2222 end if 2223 2224 2225 2226 if ( write_debug ) then 2227 close(io_unit) 2228 end if 2229 2230 write(io_unit_log,10) " - Deallocate and exit...." 2231 flush(io_unit_log) 2232 2233 2234 2235 if (debug) then 2236 ABI_DEALLOCATE(check_matrix) 2237 ABI_DEALLOCATE(check_matrix2) 2238 end if 2239 2240 2241 ABI_DEALLOCATE(psikg_valence) 2242 ABI_DEALLOCATE(psir_valence) 2243 ABI_DEALLOCATE(psikg_VL) 2244 2245 2246 ABI_DEALLOCATE(YL) 2247 2248 ABI_DEALLOCATE(local_Lbasis_conjugated) 2249 ABI_DEALLOCATE(local_Lbasis) 2250 2251 2252 ABI_DEALLOCATE(svd_matrix) 2253 ABI_DEALLOCATE(svd_values) 2254 ABI_DEALLOCATE(psi_gamma_l1) 2255 ABI_DEALLOCATE(psi_gamma_l2) 2256 2257 ABI_DEALLOCATE(psikg_in) 2258 ABI_DEALLOCATE(psikg_out) 2259 2260 ABI_DEALLOCATE(psi_rhs) 2261 2262 ABI_DEALLOCATE(c_sternheimer_solutions) 2263 ABI_DEALLOCATE(QR_orthonormal_basis) 2264 2265 ABI_DEALLOCATE(psik_wrk) 2266 ABI_DEALLOCATE(psikb_wrk) 2267 ABI_DEALLOCATE(psikg_wrk) 2268 2269 2270 close(io_unit_log) 2271 2272 2273 10 format(A) 2274 12 format(A,F12.8,A) 2275 14 format(I5,10X,ES24.12) 2276 15 format(I5,10X,ES24.12,10X,A) 2277 2278 19 format(20X,A,15X,E24.16) 2279 2280 20 format(A,I5) 2281 21 format(I5,10X,F8.4,15X,ES24.12) 2282 22 format(A,I5,A) 2283 23 format(A,I5,A,I5) 2284 30 format(2X,1000(ES12.4,2X,ES12.4,5X)) 2285 2286 end subroutine ProjectedSternheimerEpsilon