TABLE OF CONTENTS


ABINIT/gwls_DielectricArray [ Modules ]

[ Top ] [ Modules ]

NAME

 gwls_DielectricArray

FUNCTION

  .

COPYRIGHT

 Copyright (C) 2009-2018 ABINIT group (JLJ, BR, MC)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .

PARENTS

CHILDREN

SOURCE

21 #if defined HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24 
25 #include "abi_common.h"
26 
27 
28 module gwls_DielectricArray
29 !----------------------------------------------------------------------------------------------------
30 ! This module generates and stores the arrays
31 !
32 !        { eps^{-1}(iw)-eps_model^{-1}(iw) }  in Lanczos basis
33 !        { eps_model^{-1}(iw) - 1 }           in model Lanczos basis
34 !
35 ! It makes sense to build these only once, as they do not depend on the external frequency.
36 !
37 !----------------------------------------------------------------------------------------------------
38 
39 ! local modules
40 use m_gwls_utility
41 use gwls_wf
42 use gwls_valenceWavefunctions
43 use gwls_hamiltonian
44 use gwls_lineqsolver
45 use gwls_polarisability
46 use gwls_model_polarisability
47 use gwls_GenerateEpsilon
48 use gwls_TimingLog
49 use gwls_QR_factorization
50 use gwls_LanczosBasis
51 
52 ! abinit modules
53 use defs_basis
54 use m_profiling_abi
55 use m_xmpi
56 use m_cgtools
57 
58 use m_io_tools,            only: get_unit
59 use m_gaussian_quadrature, only: get_frequencies_and_weights_legendre
60 
61 
62 implicit none
63 save
64 
65 private

m_hamiltonian/cleanup_projected_Sternheimer_epsilon [ Functions ]

[ Top ] [ m_hamiltonian ] [ Functions ]

NAME

  cleanup_projected_Sternheimer_epsilon

FUNCTION

  .

INPUTS

OUTPUT

PARENTS

      gwls_ComputeCorrelationEnergy

CHILDREN

      cpu_time,extract_svd,g_to_r,gr_to_g,hpsik,pc_k_valence_kernel
      setup_pk_model,sqmr,sqrt_vc_k,wf_block_distribute
      write_text_block_in_timing_log,write_timing_log,xmpi_sum,zgemm,zgesv

SOURCE

1036 subroutine cleanup_projected_Sternheimer_epsilon
1037 
1038 
1039 !This section has been created automatically by the script Abilint (TD).
1040 !Do not modify the following lines by hand.
1041 #undef ABI_FUNC
1042 #define ABI_FUNC 'cleanup_projected_Sternheimer_epsilon'
1043 !End of the abilint section
1044 
1045 implicit none
1046 
1047 ! *************************************************************************
1048 
1049 if(allocated(projected_epsilon_M_matrix)) then
1050   ABI_DEALLOCATE(projected_epsilon_M_matrix)
1051 end if
1052 if(allocated(projected_epsilon_B_matrix)) then
1053   ABI_DEALLOCATE(projected_epsilon_B_matrix)
1054 end if
1055 if(allocated(projected_epsilon_G_matrix)) then
1056   ABI_DEALLOCATE(projected_epsilon_G_matrix)
1057 end if
1058 if(allocated(eps_m1_minus_eps_model_m1)) then
1059   ABI_DEALLOCATE(eps_m1_minus_eps_model_m1)
1060 end if
1061 if(allocated(list_omega)) then
1062   ABI_DEALLOCATE(list_omega)
1063 end if
1064 if(allocated(list_weights)) then
1065   ABI_DEALLOCATE(list_weights)
1066 end if
1067 if(allocated(eps_model_m1_minus_one_DISTR)) then
1068   ABI_DEALLOCATE(eps_model_m1_minus_one_DISTR)
1069 end if
1070 if(allocated(model_lanczos_vector_belongs_to_this_node)) then
1071   ABI_DEALLOCATE(model_lanczos_vector_belongs_to_this_node)
1072 end if
1073 if(allocated(model_lanczos_vector_index)) then
1074   ABI_DEALLOCATE(model_lanczos_vector_index)
1075 end if
1076 
1077 
1078 end subroutine cleanup_projected_Sternheimer_epsilon

m_hamiltonian/compute_eps_m1_minus_eps_model_m1 [ Functions ]

[ Top ] [ m_hamiltonian ] [ Functions ]

NAME

  compute_eps_m1_minus_eps_model_m1

FUNCTION

  .

INPUTS

OUTPUT

PARENTS

      gwls_ComputeCorrelationEnergy

CHILDREN

      cpu_time,extract_svd,g_to_r,gr_to_g,hpsik,pc_k_valence_kernel
      setup_pk_model,sqmr,sqrt_vc_k,wf_block_distribute
      write_text_block_in_timing_log,write_timing_log,xmpi_sum,zgemm,zgesv

SOURCE

205 subroutine compute_eps_m1_minus_eps_model_m1(lmax, npt_gauss)
206 !----------------------------------------------------------------------------------------------------
207 !
208 ! This subroutine computes the array 
209 !
210 !                eps^{-1}(iw) - eps_model^{-1}(iw), 
211 !
212 ! for all relevant frequencies in the Lanczos basis.
213 !----------------------------------------------------------------------------------------------------
214 
215 !This section has been created automatically by the script Abilint (TD).
216 !Do not modify the following lines by hand.
217 #undef ABI_FUNC
218 #define ABI_FUNC 'compute_eps_m1_minus_eps_model_m1'
219 !End of the abilint section
220 
221 implicit none
222 
223 integer ,     intent(in)  :: lmax, npt_gauss
224 
225 character(256) :: timing_string
226 real(dp)       :: time1, time2
227 real(dp)       :: time
228 
229 integer        :: iw, l
230 complex(dpc),allocatable  :: dummy_matrix(:,:)
231 complex(dpc),allocatable  :: iden(:,:)
232 
233 ! *************************************************************************
234 
235 
236 timing_string = "#"
237 call write_text_block_in_Timing_log(timing_string)
238 timing_string = "#        Computing eps^{-1}(iw) - eps_model^{-1}(iw) "
239 call write_text_block_in_Timing_log(timing_string)
240 timing_string = "#"
241 call write_text_block_in_Timing_log(timing_string)
242 
243 
244 call cpu_time(time1)
245 ! Allocate the module array
246 ABI_ALLOCATE(eps_m1_minus_eps_model_m1, (lmax,lmax,npt_gauss+1))
247 ABI_ALLOCATE(dummy_matrix, (lmax,lmax))
248 ABI_ALLOCATE(iden, (lmax,lmax))
249 
250 
251 iden = cmplx_0
252 
253 do l = 1, lmax
254 iden(l,l) = cmplx_1
255 end do
256 
257 do iw = 1, npt_gauss + 1
258 
259 
260 dummy_matrix(:,:) = projected_dielectric_Lanczos_basis(:,:,iw)
261 
262 call driver_invert_positive_definite_hermitian_matrix(dummy_matrix,lmax)
263 
264 
265 eps_m1_minus_eps_model_m1(:,:,iw) = dummy_matrix(:,:)
266 
267 dummy_matrix(:,:) = model_dielectric_Lanczos_basis(:,:,iw)
268 
269 
270 
271 call driver_invert_positive_definite_hermitian_matrix(dummy_matrix,lmax)
272 
273 eps_m1_minus_eps_model_m1(:,:,iw) = eps_m1_minus_eps_model_m1(:,:,iw) - dummy_matrix(:,:)
274 
275 
276 end do
277 
278 
279 
280 
281 ! Deallocate the arrays which are no longer needed
282 ABI_DEALLOCATE(model_dielectric_Lanczos_basis)
283 ABI_DEALLOCATE(projected_dielectric_Lanczos_basis)
284 
285 
286 
287 ABI_DEALLOCATE(dummy_matrix)
288 ABI_DEALLOCATE(iden)
289 
290 
291 
292 call cpu_time(time2)
293 time = time2-time1
294 
295 timing_string = "#        Total time   :   "
296 call write_timing_log(timing_string,time)
297 
298 
299 
300 
301 end subroutine compute_eps_m1_minus_eps_model_m1

m_hamiltonian/compute_eps_m1_minus_one [ Functions ]

[ Top ] [ m_hamiltonian ] [ Functions ]

NAME

  compute_eps_m1_minus_one

FUNCTION

  .

INPUTS

OUTPUT

PARENTS

      gwls_ComputeCorrelationEnergy

CHILDREN

      cpu_time,extract_svd,g_to_r,gr_to_g,hpsik,pc_k_valence_kernel
      setup_pk_model,sqmr,sqrt_vc_k,wf_block_distribute
      write_text_block_in_timing_log,write_timing_log,xmpi_sum,zgemm,zgesv

SOURCE

325 subroutine compute_eps_m1_minus_one(lmax, npt_gauss)
326 !----------------------------------------------------------------------------------------------------
327 !
328 ! This subroutine computes the array 
329 !
330 !                eps^{-1}(iw) - I
331 !
332 ! for all relevant frequencies in the Lanczos basis.
333 !----------------------------------------------------------------------------------------------------
334 
335 !This section has been created automatically by the script Abilint (TD).
336 !Do not modify the following lines by hand.
337 #undef ABI_FUNC
338 #define ABI_FUNC 'compute_eps_m1_minus_one'
339 !End of the abilint section
340 
341 implicit none
342 
343 integer ,     intent(in)  :: lmax, npt_gauss
344 
345 character(256) :: timing_string
346 real(dp)       :: time1, time2
347 real(dp)       :: time
348 
349 integer        :: iw, l
350 complex(dpc),allocatable  :: dummy_matrix(:,:)
351 complex(dpc),allocatable  :: iden(:,:)
352 
353 ! *************************************************************************
354 
355 
356 
357 timing_string = "#"
358 call write_text_block_in_Timing_log(timing_string)
359 timing_string = "#        Computing eps^{-1}(iw) - I "
360 call write_text_block_in_Timing_log(timing_string)
361 timing_string = "#"
362 call write_text_block_in_Timing_log(timing_string)
363 
364 call cpu_time(time1)
365 ! Allocate the module array
366 
367 ! The array eps_m1_minus_eps_model_m1 will be used to store
368 ! eps^{-1}-1; we can think of eps_model = I in this case.
369 ABI_ALLOCATE(eps_m1_minus_eps_model_m1, (lmax,lmax,npt_gauss+1))
370 
371 ABI_ALLOCATE(dummy_matrix, (lmax,lmax))
372 ABI_ALLOCATE(iden, (lmax,lmax))
373 
374 iden = cmplx_0
375 
376 do l = 1, lmax
377 iden(l,l) = cmplx_1
378 end do
379 
380 
381 do iw = 1, npt_gauss + 1
382 
383 dummy_matrix(:,:) = projected_dielectric_Lanczos_basis(:,:,iw)
384 
385 call driver_invert_positive_definite_hermitian_matrix(dummy_matrix,lmax)
386 
387 eps_m1_minus_eps_model_m1(:,:,iw) = dummy_matrix(:,:)-iden(:,:)
388 
389 end do
390 
391 
392 ! Deallocate the arrays which are no longer needed
393 ABI_DEALLOCATE(projected_dielectric_Lanczos_basis)
394 
395 ABI_DEALLOCATE(dummy_matrix)
396 ABI_DEALLOCATE(iden)
397 
398 call cpu_time(time2)
399 time = time2-time1
400 
401 timing_string = "#        Total time   :   "
402 call write_timing_log(timing_string,time)
403 
404 end subroutine compute_eps_m1_minus_one

m_hamiltonian/compute_eps_model_m1_minus_one [ Functions ]

[ Top ] [ m_hamiltonian ] [ Functions ]

NAME

  compute_eps_model_m1_minus_one

FUNCTION

  .

INPUTS

OUTPUT

PARENTS

      gwls_ComputeCorrelationEnergy

CHILDREN

      cpu_time,extract_svd,g_to_r,gr_to_g,hpsik,pc_k_valence_kernel
      setup_pk_model,sqmr,sqrt_vc_k,wf_block_distribute
      write_text_block_in_timing_log,write_timing_log,xmpi_sum,zgemm,zgesv

SOURCE

 428 subroutine compute_eps_model_m1_minus_one(lmax_model, npt_gauss, second_model_parameter, epsilon_model_eigenvalues_0)
 429 !----------------------------------------------------------------------------------------------------
 430 !
 431 ! This subroutine computes the array 
 432 !
 433 !                eps_model^{-1}(iw) - 1
 434 !
 435 ! for all relevant frequencies in the model Lanczos basis.
 436 !
 437 ! This array can potentially get very large, as the complementary basis gets big to achieve 
 438 ! convergence. Correspondingly, it makes sense to DISTRIBUTE this array on all processors.
 439 !
 440 ! The algorithm will go as follows:
 441 !
 442 !               eps_m = 1 - V . P . V, V = sqrt{vc}
 443 !
 444 !       I ) compute VPV, storing blocks on different processors:
 445 !
 446 !               VPV = [ ------- --------      ]  = VPV[lc, nB, nW]
 447 !                  |  [| block |  block |     ]
 448 !                 lc  [|   1   |    2   | ... ]
 449 !                  |  [|       |        |     ]
 450 !                  |  [|       |        |     ]
 451 !                  |  [ ------- ---------     ]
 452 !                        bsize
 453 !
 454 !               This construction *does* involve a fair bit of communications, but it takes
 455 !               a lot less RAM!
 456 !
 457 !       II) once VPV is constructed, do, one frequency at a time:
 458 !               - import all blocks to the HEAD processor
 459 !               - compute eps_m = 1- VPV
 460 !               - invert eps_m^{-1}
 461 !               - subtract identity eps_m^{-1} - 1
 462 !               - redistribute, block by block
 463 !        
 464 !               Doing this frequency by frequency will reduce the RAM weight on the head node.
 465 !
 466 !
 467 !----------------------------------------------------------------------------------------------------
 468 
 469 !This section has been created automatically by the script Abilint (TD).
 470 !Do not modify the following lines by hand.
 471 #undef ABI_FUNC
 472 #define ABI_FUNC 'compute_eps_model_m1_minus_one'
 473  use interfaces_18_timing
 474 !End of the abilint section
 475 
 476 implicit none
 477 
 478 integer,  intent(in) :: lmax_model, npt_gauss
 479 real(dp), intent(in) :: second_model_parameter
 480 real(dp), intent(in) :: epsilon_model_eigenvalues_0(lmax_model)
 481 
 482 integer  :: l, l1, l2
 483 integer  :: iw 
 484 integer  :: v
 485 integer  :: ierr
 486 
 487 real(dp),    allocatable :: psikg_valence(:,:)
 488 real(dp),    allocatable :: psir_valence(:,:,:,:)
 489 
 490 
 491 real(dp),    allocatable :: psik_wrk(:,:)
 492 real(dp),    allocatable :: psikb_wrk(:,:)
 493 real(dp),    allocatable :: psikg_wrk(:,:)
 494 real(dp),    allocatable :: psikg_tmp(:,:)
 495 
 496 complex(dpc),allocatable :: local_Lbasis_conjugated(:,:)
 497 
 498 
 499 complex(dpc),allocatable :: VPV(:,:,:)
 500 real(dp),    allocatable :: re_buffer(:,:), im_buffer(:,:) 
 501 
 502 complex(dpc),allocatable :: vpv_row(:)
 503 
 504 
 505 complex(dpc),allocatable :: epsilon_head(:,:)
 506 
 507 real(dp), allocatable :: re_BUFFER_head(:,:)
 508 real(dp), allocatable :: im_BUFFER_head(:,:)
 509 
 510 
 511 
 512 complex(dpc),allocatable :: YL(:)
 513 
 514 character(256) :: timing_string
 515 real(dp)       :: time1, time2, time
 516 real(dp)       :: fft_time1, fft_time2, fft_time
 517 real(dp)       :: prod_time1, prod_time2, prod_time
 518 
 519 complex(dpc)   :: z
 520 
 521 
 522 integer   :: iblk_lanczos, nbdblock_lanczos
 523 integer   :: mb
 524 integer   :: lb
 525 integer   :: ik
 526 
 527 integer   :: mpi_communicator
 528 integer   :: mpi_nproc
 529 integer   :: mpi_rank
 530 integer   :: mpi_head_rank
 531 
 532 
 533 integer,allocatable   :: sendcounts(:), displs(:)
 534 
 535 integer   :: sendcount, recvcount
 536 
 537 
 538 logical   :: head
 539 
 540 
 541 ! timing
 542 real(dp) :: tsec(2)
 543 integer :: GWLS_TIMAB, OPTION_TIMAB
 544 
 545 ! *************************************************************************
 546 
 547 
 548 GWLS_TIMAB   = 1541
 549 OPTION_TIMAB = 1
 550 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
 551 
 552 
 553 
 554 timing_string = "#"
 555 call write_text_block_in_Timing_log(timing_string)
 556 timing_string = "#        computing eps_model_m1_minus_one"
 557 call write_text_block_in_Timing_log(timing_string)
 558 timing_string = "#"
 559 call write_text_block_in_Timing_log(timing_string)
 560 
 561 
 562 ! Number of blocks of lanczos vectors (blocksize = npband)
 563 nbdblock_lanczos = lmax_model/blocksize
 564 if (modulo(lmax_model,blocksize) /= 0) nbdblock_lanczos = nbdblock_lanczos + 1
 565 
 566 
 567 ! communicator 
 568 mpi_communicator = mpi_enreg%comm_bandfft
 569 
 570 ! total number of processors in the communicator
 571 mpi_nproc        = xmpi_comm_size(mpi_communicator )
 572 
 573 ! what is the rank of this processor?
 574 mpi_rank         = xmpi_comm_rank(mpi_communicator )
 575 
 576 ! rank of the "head" processor
 577 mpi_head_rank    = 0
 578 
 579 
 580 ! number of blocks in the model dielectric matrix, which is equal to the number of processors
 581 nbdblock_epsilon = mpi_nproc
 582 
 583 
 584 ! number of vectors in every block
 585 blocksize_epsilon =  lmax_model/mpi_nproc
 586 if (modulo(lmax_model,mpi_nproc) /= 0) blocksize_epsilon = blocksize_epsilon + 1
 587 
 588 ! attribute blocks to every nodes, and tabulate ownership in logical array
 589 ! This is not *the most efficient* implementation possible, but it is convenient
 590 ABI_ALLOCATE( model_lanczos_vector_belongs_to_this_node, (lmax_model))
 591 ABI_ALLOCATE( model_lanczos_vector_index, (lmax_model))
 592 
 593 
 594 model_lanczos_vector_index = 0
 595 model_lanczos_vector_belongs_to_this_node = .false.
 596 
 597 do l =1, lmax_model
 598 if (mpi_rank == (l-1)/blocksize_epsilon) then
 599   model_lanczos_vector_belongs_to_this_node(l) = .true.
 600   model_lanczos_vector_index(l) = l-mpi_rank*blocksize_epsilon
 601 end if
 602 end do
 603 
 604 !write(100+mpi_rank,*) "model_lanczos_vector_belongs_to_this_node = ",model_lanczos_vector_belongs_to_this_node(:)
 605 !write(100+mpi_rank,*) "model_lanczos_vector_index                = ",model_lanczos_vector_index(:)                
 606 !flush(100+mpi_rank)
 607 
 608 ! Prepare the array that will contain the matrix elements of the model operator
 609 ABI_ALLOCATE(VPV, (lmax_model,blocksize_epsilon,npt_gauss+1))
 610 
 611 VPV(:,:,:) = cmplx_0
 612 
 613 ABI_ALLOCATE(vpv_row, (lmax_model))
 614 
 615 
 616 ! various working arrays
 617 GWLS_TIMAB   = 1542
 618 OPTION_TIMAB = 1
 619 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
 620 
 621 ABI_ALLOCATE(psikg_valence,(2,npw_g))
 622 ABI_ALLOCATE(psir_valence ,(2,n4,n5,n6))
 623 
 624 
 625 ABI_ALLOCATE(psik_wrk,  (2,npw_k))
 626 ABI_ALLOCATE(psikb_wrk, (2,npw_kb))
 627 ABI_ALLOCATE(psikg_wrk, (2,npw_g))
 628 ABI_ALLOCATE(psikg_tmp, (2,npw_g))
 629 
 630 ABI_ALLOCATE(local_Lbasis_conjugated,(npw_k,lmax_model))
 631 ABI_ALLOCATE(YL,(npw_k))
 632 
 633 OPTION_TIMAB = 2
 634 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
 635 
 636 
 637 
 638 fft_time  = zero
 639 prod_time = zero
 640 
 641 
 642 call cpu_time(time1)
 643 ! loop on all valence bands
 644 
 645 do v = 1, nbandv
 646 
 647 
 648 ! copy pre-calculated valence state in this covenient local array
 649 psikg_valence(:,:) = kernel_wavefunctions_FFT(:,:,v)
 650 
 651 ! compute fourier transform of valence state, and conjugate
 652 call g_to_r(psir_valence,psikg_valence) 
 653 psir_valence(2,:,:,:) = -psir_valence(2,:,:,:) 
 654 
 655 !--------------------------------------------------------------------------
 656 !
 657 ! Step 1: build the modified basis Pc . [ (V^{1/2} l) . psi_v^*]. 
 658 !
 659 !--------------------------------------------------------------------------
 660 GWLS_TIMAB   = 1543
 661 OPTION_TIMAB = 1
 662 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
 663 
 664 ! loop on all blocks of lanczos vectors
 665 do iblk_lanczos = 1, nbdblock_lanczos
 666 ! loop on all states within this block
 667 do mb = 1, blocksize
 668 
 669 ! Determine the index of the Lanczos vector
 670 l = (iblk_lanczos-1)*blocksize + mb
 671 
 672 if ( l <= lmax_model) then
 673   psik_wrk(1,:) = dble (Lbasis_model_lanczos(:,l))
 674   psik_wrk(2,:) = dimag(Lbasis_model_lanczos(:,l))
 675 else
 676   psik_wrk(:,:) = zero
 677 end if
 678 
 679 ! apply Coulomb potential
 680 call sqrt_vc_k(psik_wrk)
 681 
 682 ! Store in array of blocks of wavefunctions
 683 psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:)
 684 
 685 end do ! mb
 686 
 687 call cpu_time(fft_time1)
 688 
 689 ! Transform to FFT representation
 690 call wf_block_distribute(psikb_wrk,  psikg_wrk,1) ! LA -> FFT 
 691 
 692 
 693 !  generate the vector  Pc [ (sqrt_V_c.l) psi_v^*]
 694 
 695 ! Compute the real space product, and return to k space, in FFT configuration
 696 call gr_to_g(psikg_tmp,psir_valence, psikg_wrk)
 697 
 698 
 699 call cpu_time(fft_time2)
 700 fft_time = fft_time + fft_time2-fft_time1
 701 
 702 ! project
 703 call pc_k_valence_kernel(psikg_tmp)
 704 
 705 ! Return to LA configuration
 706 
 707 ! Transform to FFT representation
 708 call wf_block_distribute(psikb_wrk,  psikg_tmp, 2) ! FFT -> LA
 709 
 710 do mb = 1, blocksize
 711 
 712 ! Determine the index of the Lanczos vector
 713 l = (iblk_lanczos-1)*blocksize + mb
 714 
 715 
 716 if ( l <= lmax_model) then
 717   psik_wrk(:,:) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) 
 718   local_Lbasis_conjugated(:,l) = cmplx_1*psik_wrk(1,:)+cmplx_i*psik_wrk(2,:)
 719 end if
 720 
 721 
 722 end do ! mb
 723 
 724 end do !iblk_lanczos 
 725 OPTION_TIMAB = 2
 726 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
 727 
 728 
 729 !--------------------------------------------------------------------------
 730 ! Step 2: Now that we have the modified basis, compute the matrix
 731 !             elements of the model dielectric operator
 732 !
 733 !
 734 !--------------------------------------------------------------------------
 735 GWLS_TIMAB   = 1544
 736 OPTION_TIMAB = 1
 737 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
 738 
 739 do iw = 2, npt_gauss+1
 740 
 741 call setup_Pk_model(list_omega(iw),second_model_parameter)
 742 
 743 call cpu_time(prod_time1)
 744 do l1 = 1, lmax_model
 745 
 746 ! Apply core function Y to left-vector; conjugate
 747 do ik = 1, npw_k         
 748 YL(ik) = model_Y_LA(ik)*conjg(local_Lbasis_conjugated(ik,l1))
 749 end do
 750 
 751 ! Only compute lower diagonal part of matrix; epsilon is hermitian conjugate!
 752 vpv_row = cmplx_0
 753 do l2 = 1, l1
 754 do ik = 1, npw_k         
 755 vpv_row(l2) = vpv_row(l2) + YL(ik)*local_Lbasis_conjugated(ik,l2)
 756 end do
 757 end do
 758 
 759 ! the code below is twice as long!
 760 !call ZGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )
 761 !call ZGEMV ( 'T', npw_k, lmax_model, cmplx_1, local_Lbasis_conjugated, npw_k, YL, 1, cmplx_0, vpv_row, 1)
 762 
 763 !do l2 = 1, l1
 764 !        eps_model_m1_minus_one(l1,l2,iw) = eps_model_m1_minus_one(l1,l2,iw)     &
 765 !                       -complex_vector_product(YL, local_Lbasis_conjugated(:,l2),npw_k)
 766 !end do
 767 
 768 ! Sum on all processors, making sure all processors have the total vpv_row
 769 call xmpi_sum(vpv_row, mpi_communicator, ierr) ! sum on all processors for LA configuration
 770 
 771 ! Each processor takes its slice!
 772 do l2 =1, l1
 773 if ( model_lanczos_vector_belongs_to_this_node(l2) ) then
 774   lb = model_lanczos_vector_index(l2)
 775   VPV(l1,lb,iw) = VPV(l1,lb,iw) + vpv_row(l2)
 776 end if
 777 end do
 778 
 779 end do
 780 call cpu_time(prod_time2)
 781 
 782 prod_time = prod_time + prod_time2-prod_time1
 783 
 784 end do ! iw
 785 OPTION_TIMAB = 2
 786 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
 787 
 788 end do ! v
 789 
 790 call cpu_time(time2)
 791 
 792 time = time2-time1
 793 timing_string = "#        computing VPV                          : "
 794 call write_timing_log(timing_string,time)
 795 
 796 timing_string = "#                --- of which is FFT transforms : "
 797 call write_timing_log(timing_string,fft_time)
 798 
 799 timing_string = "#                --- of which is products       : "
 800 call write_timing_log(timing_string,prod_time)
 801 
 802 
 803 
 804 
 805 
 806 GWLS_TIMAB   = 1542
 807 OPTION_TIMAB = 1
 808 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
 809 ABI_DEALLOCATE(local_Lbasis_conjugated)
 810 ABI_DEALLOCATE(YL)
 811 ABI_DEALLOCATE(psikg_valence)
 812 ABI_DEALLOCATE(psir_valence)
 813 ABI_DEALLOCATE(psik_wrk)
 814 ABI_DEALLOCATE(psikb_wrk)
 815 ABI_DEALLOCATE(psikg_wrk)
 816 ABI_DEALLOCATE(psikg_tmp)
 817 ABI_DEALLOCATE(vpv_row)
 818 
 819 OPTION_TIMAB = 2
 820 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
 821 
 822 
 823 
 824 call cpu_time(time1)
 825 GWLS_TIMAB   = 1547
 826 OPTION_TIMAB = 1
 827 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
 828 
 829 
 830 !--------------------------------------------------------------------------------
 831 !
 832 !
 833 ! Gather dielectric matrix on head node, invert, and re-distribute. This
 834 ! Saves a lot of RAM, without needing the full machinery of ScaLAPACK.
 835 !
 836 !--------------------------------------------------------------------------------
 837 
 838 
 839 ABI_ALLOCATE(eps_model_m1_minus_one_DISTR, (lmax_model,blocksize_epsilon,npt_gauss+1))
 840 ABI_ALLOCATE(re_buffer, (lmax_model,blocksize_epsilon))
 841 ABI_ALLOCATE(im_buffer, (lmax_model,blocksize_epsilon))
 842 
 843 eps_model_m1_minus_one_DISTR(:,:,:) = cmplx_0
 844 
 845 ! Define the head node, which will invert the dielectric matrices
 846 head = mpi_rank == mpi_head_rank
 847 
 848 ! Amount of data received and sent
 849 sendcount = lmax_model*blocksize_epsilon
 850 recvcount = lmax_model*blocksize_epsilon
 851 
 852 ABI_ALLOCATE(sendcounts,(mpi_nproc))
 853 ABI_ALLOCATE(displs    ,(mpi_nproc))
 854 sendcounts(:) = sendcount 
 855 
 856 do l =1, mpi_nproc
 857 displs(l) = (l-1)*sendcount
 858 end do
 859 
 860 if (head) then
 861   ! build and invert the dielectric array
 862   ! careful!  lmax_model not necessarily equal to blocksize_epsilon*nbdblock_epsilon
 863   ABI_ALLOCATE(re_BUFFER_head, (lmax_model, blocksize_epsilon*nbdblock_epsilon))
 864   ABI_ALLOCATE(im_BUFFER_head, (lmax_model, blocksize_epsilon*nbdblock_epsilon))
 865   ABI_ALLOCATE(epsilon_head, (lmax_model, lmax_model))
 866 else
 867   !This looks superfluous and it is on large number of systems, but sending these 
 868   !unallocated in xmpi_scatterv caused 'cannot allocate memory' cryptic errors on 
 869   !several parallel test farm computers (cronos_gcc46_paral, petrus_nag, inca_gcc44_sdebug)
 870   ABI_ALLOCATE(re_BUFFER_head, (1,1))
 871   ABI_ALLOCATE(im_BUFFER_head, (1,1))
 872   ABI_ALLOCATE(epsilon_head, (1,1))
 873 end if
 874 
 875 ! Do  one frequency at a time, to avoid overflowing the RAM
 876 do iw = 1, npt_gauss+1
 877 ! Gather, except for static case
 878 if ( iw /=1 ) then
 879   ! Gather VPV on head node, for this frequency
 880   call xmpi_gather(dble(VPV(:,:,iw)),  sendcount , re_BUFFER_head, recvcount, mpi_head_rank, mpi_communicator,ierr)
 881   call xmpi_gather(dimag(VPV(:,:,iw)), sendcount , im_BUFFER_head, recvcount, mpi_head_rank, mpi_communicator,ierr)
 882 end if
 883 
 884 if ( head ) then
 885 
 886   ! fill the dielectric matrix
 887 
 888   epsilon_head(:,:) = cmplx_0
 889   if (iw ==1) then
 890     ! STATIC CASE, diagonal matrix
 891     do l= 1, lmax_model
 892     epsilon_head(l,l) = cmplx_1/epsilon_model_eigenvalues_0(l)-cmplx_1
 893     end do
 894 
 895   else
 896     ! DYNAMIC CASE, compute
 897     do l1 =1, lmax_model
 898     do l2 =1, l1
 899     z = -cmplx_1*re_BUFFER_head(l1,l2)-cmplx_i*im_BUFFER_head(l1,l2)
 900     epsilon_head(l1,l2) = z
 901     epsilon_head(l2,l1) = conjg(z)
 902     end do
 903     epsilon_head(l1,l1) = epsilon_head(l1,l1) + cmplx_1
 904     end do
 905 
 906     ! invert the matrix
 907     call driver_invert_positive_definite_hermitian_matrix(epsilon_head,lmax_model)
 908 
 909     ! subtract identity
 910     do l =1, lmax_model
 911     epsilon_head(l,l) = epsilon_head(l,l) - cmplx_1
 912     end do 
 913   end if
 914 
 915   ! copy in head buffer
 916   re_BUFFER_head(:,:) = zero
 917   im_BUFFER_head(:,:) = zero
 918   do l1 =1, lmax_model
 919   do l2 =1, lmax_model
 920   z = epsilon_head(l1,l2)
 921   re_BUFFER_head(l1,l2) = dble(z) 
 922   im_BUFFER_head(l1,l2) = dimag(z) 
 923   end do 
 924   end do 
 925 
 926 end if 
 927 
 928 ! Scatter back the data on the head to all processors
 929 call xmpi_scatterv(re_BUFFER_head, sendcounts, displs, re_buffer, recvcount, mpi_head_rank, mpi_communicator, ierr)
 930 call xmpi_scatterv(im_BUFFER_head, sendcounts, displs, im_buffer, recvcount, mpi_head_rank, mpi_communicator, ierr)
 931 
 932 eps_model_m1_minus_one_DISTR(:,:,iw) = cmplx_1*re_buffer(:,:) + cmplx_i*im_buffer(:,:)
 933 
 934 end do
 935 
 936 ABI_DEALLOCATE(re_BUFFER_head)
 937 ABI_DEALLOCATE(im_BUFFER_head)
 938 ABI_DEALLOCATE(epsilon_head)
 939 
 940 
 941 
 942 !================================================================================
 943 !
 944 ! For debugging purposes, store distributed dielectric matrix back in the 
 945 ! complete local copies, to insure the rest of the code works.
 946 !
 947 !================================================================================
 948 
 949 if (.false.) then
 950   ! Prepare the array that will contain the matrix elements of the model operator
 951   ! THIS IS ONLY FOR THE REST OF THE CODE TO WORK; WE WILL REMOVE THIS 
 952   ! TO SAVE RAM LATER
 953   ABI_ALLOCATE(eps_model_m1_minus_one, (lmax_model,lmax_model,npt_gauss+1))
 954 
 955   ! initialize the array with zeros
 956   eps_model_m1_minus_one = cmplx_0
 957 
 958   ! Amount of data received and sent
 959   sendcount = lmax_model*blocksize_epsilon
 960   recvcount = lmax_model*blocksize_epsilon*nbdblock_epsilon
 961 
 962 
 963   ABI_ALLOCATE(re_BUFFER_head, (lmax_model,blocksize_epsilon*nbdblock_epsilon))
 964   ABI_ALLOCATE(im_BUFFER_head, (lmax_model,blocksize_epsilon*nbdblock_epsilon))
 965 
 966   do iw = 1, npt_gauss+1
 967 
 968   re_buffer(:,:) = dble( eps_model_m1_minus_one_DISTR(:,:,iw))
 969   im_buffer(:,:) = dimag(eps_model_m1_minus_one_DISTR(:,:,iw))
 970 
 971   call xmpi_allgather(re_buffer,sendcount,re_BUFFER_head,mpi_communicator,ierr)
 972   call xmpi_allgather(im_buffer,sendcount,im_BUFFER_head,mpi_communicator,ierr)
 973 
 974   do l = 1, lmax_model
 975   eps_model_m1_minus_one(:,l,iw) =  cmplx_1*re_BUFFER_head(:,l)+ cmplx_i*im_BUFFER_head(:,l)
 976   end do
 977   end do
 978 
 979   ABI_DEALLOCATE(re_BUFFER_head)
 980   ABI_DEALLOCATE(im_BUFFER_head)
 981 end if
 982 
 983 call cpu_time(time2)
 984 OPTION_TIMAB = 2
 985 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
 986 
 987 !================================================================================
 988 !
 989 !================================================================================
 990 
 991 
 992 time = time2-time1
 993 timing_string = "#        inverting / distributing               :  "
 994 
 995 call write_timing_log(timing_string,time)
 996 
 997 
 998 
 999 ABI_DEALLOCATE(re_buffer )
1000 ABI_DEALLOCATE(im_buffer )
1001 ABI_DEALLOCATE(sendcounts)
1002 ABI_DEALLOCATE(displs    )
1003 ABI_DEALLOCATE(VPV       )
1004 
1005 
1006 
1007 GWLS_TIMAB   = 1541
1008 OPTION_TIMAB = 2
1009 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
1010 
1011 
1012 end subroutine compute_eps_model_m1_minus_one

m_hamiltonian/generate_frequencies_and_weights [ Functions ]

[ Top ] [ m_hamiltonian ] [ Functions ]

NAME

  generate_frequencies_and_weights

FUNCTION

  .

INPUTS

OUTPUT

PARENTS

      gwls_ComputeCorrelationEnergy

CHILDREN

      cpu_time,extract_svd,g_to_r,gr_to_g,hpsik,pc_k_valence_kernel
      setup_pk_model,sqmr,sqrt_vc_k,wf_block_distribute
      write_text_block_in_timing_log,write_timing_log,xmpi_sum,zgemm,zgesv

SOURCE

128 subroutine generate_frequencies_and_weights(npt_gauss)
129 !--------------------------------------------------------------------------------
130 !
131 ! This subroutine computes the frequencies and weights necessary for Gauss-Legendre
132 ! quadrature, and stores the results in module arrays.
133 !
134 !--------------------------------------------------------------------------------
135 
136 !This section has been created automatically by the script Abilint (TD).
137 !Do not modify the following lines by hand.
138 #undef ABI_FUNC
139 #define ABI_FUNC 'generate_frequencies_and_weights'
140 !End of the abilint section
141 
142 implicit none
143 
144 integer, intent(in)  :: npt_gauss
145 
146 
147 real(dp), allocatable ::   list_omega_tmp(:)
148 real(dp), allocatable :: list_weights_tmp(:)
149 
150 integer     :: i
151 
152 ! *************************************************************************
153 
154 ABI_ALLOCATE(list_omega_tmp,   (npt_gauss))
155 ABI_ALLOCATE(list_weights_tmp, (npt_gauss))
156 
157 call get_frequencies_and_weights_legendre(npt_gauss,list_omega_tmp,list_weights_tmp)
158 
159 
160 ABI_ALLOCATE(list_omega,   (npt_gauss+1))
161 ABI_ALLOCATE(list_weights, (npt_gauss+1))
162 
163 ! make sure the first frequency in zero!
164 list_omega(1)   = zero
165 list_weights(1) = zero
166 
167 do i = 1,npt_gauss
168 
169 ! inverse the order of the frequency points, as they come out
170 ! in reverse order from the generating subroutine
171 list_omega  (i+1) = list_omega_tmp  (npt_gauss+1-i)
172 list_weights(i+1) = list_weights_tmp(npt_gauss+1-i)
173 
174 end do
175 
176 
177 ABI_DEALLOCATE(list_omega_tmp)
178 ABI_DEALLOCATE(list_weights_tmp)
179 
180 
181 end subroutine generate_frequencies_and_weights

m_hamiltonian/ProjectedSternheimerEpsilon [ Functions ]

[ Top ] [ m_hamiltonian ] [ Functions ]

NAME

  ProjectedSternheimerEpsilon

FUNCTION

  .

INPUTS

OUTPUT

PARENTS

      gwls_ComputeCorrelationEnergy

CHILDREN

      cpu_time,extract_svd,g_to_r,gr_to_g,hpsik,pc_k_valence_kernel
      setup_pk_model,sqmr,sqrt_vc_k,wf_block_distribute
      write_text_block_in_timing_log,write_timing_log,xmpi_sum,zgemm,zgesv

SOURCE

1103 subroutine ProjectedSternheimerEpsilon(lmax, npt_gauss, second_model_parameter, &
1104 list_projection_frequencies,nfrequencies,&
1105 epsilon_eigenvalues_0,debug,use_model)
1106 !----------------------------------------------------------------------------------------------------
1107 ! This subroutine combines in a single subprogram the jobs of previous routines
1108 !
1109 !               - setup_projected_Sternheimer_epsilon
1110 !               - compute_projected_Sternheimer_epsilon
1111 !
1112 ! The purpose of this combination is to avoid independent loops on nbandv, requiring the 
1113 ! arrays
1114 !               projected_epsilon_M_matrix
1115 !               projected_epsilon_B_matrix
1116 !               projected_epsilon_G_matrix
1117 !
1118 ! from scaling like N^3, which grows very large with problem size.
1119 ! 
1120 ! Thus, this routine:
1121 !
1122 !       -  Computes the frequency-dependent dielectric matrix in the Lanczos basis, using
1123 !          the projected Sternheimer equations.
1124 !
1125 !       -  Computes the frequency-dependent MODEL dielectric matrix in the complementary Lanczos basis.
1126 !
1127 !
1128 ! This routine will be verbose and write log files; indeed, large jobs crash in here, it will
1129 ! be important to know where/why!
1130 !
1131 ! The subroutine also computes the matrix elements on epsilon_model(iw) in the Lanczos basis;
1132 ! this is done here to avoid preforming direct products with the valence states again later.
1133 !----------------------------------------------------------------------------------------------------
1134 
1135 !This section has been created automatically by the script Abilint (TD).
1136 !Do not modify the following lines by hand.
1137 #undef ABI_FUNC
1138 #define ABI_FUNC 'ProjectedSternheimerEpsilon'
1139 !End of the abilint section
1140 
1141 implicit none
1142 
1143 real(dp), parameter     :: svd_tolerance = 1.0e-16_dp
1144 
1145 integer,     intent(in) :: lmax, npt_gauss
1146 integer,     intent(in) :: nfrequencies
1147 real(dp),    intent(in) :: list_projection_frequencies(nfrequencies)
1148 logical,     intent(in) :: debug
1149 real(dp),    intent(in) :: epsilon_eigenvalues_0(lmax)
1150 
1151 logical,optional,intent(in) :: use_model
1152 
1153 real(dp),    intent(in) :: second_model_parameter
1154 
1155 
1156 integer :: l, l1, l2
1157 integer :: i, iw, v
1158 integer :: recy_i
1159 integer :: lsolutions_max, lsolutions, ls
1160 integer :: projection
1161 
1162 complex(dpc), allocatable :: sternheimer_A0(:,:)
1163 complex(dpc), allocatable :: sternheimer_A(:,:)
1164 complex(dpc), allocatable :: sternheimer_B(:,:)
1165 complex(dpc), allocatable :: sternheimer_X(:,:)
1166 complex(dpc), allocatable :: sternheimer_G(:,:)
1167 
1168 
1169 complex(dpc), allocatable :: dummy_tmp_1(:,:)
1170 complex(dpc), allocatable :: dummy_tmp_2(:,:)
1171 
1172 integer, allocatable      :: ipiv(:)
1173 
1174 
1175 
1176 complex(dpc),allocatable :: local_Lbasis(:,:)
1177 complex(dpc),allocatable :: local_Lbasis_conjugated(:,:)
1178 
1179 complex(dpc),allocatable :: YL(:)
1180 
1181 real(dp), allocatable :: psikg_in(:,:), psikg_out(:,:)
1182 
1183 real(dp), allocatable :: psik_wrk(:,:), psikg_wrk(:,:), psikb_wrk(:,:)
1184 real(dp), allocatable :: psi_gamma_l1(:,:), psi_gamma_l2(:,:)
1185 
1186 real(dp), allocatable :: psikg_valence(:,:)
1187 real(dp), allocatable :: psir_valence(:,:,:,:)
1188 
1189 real(dp), allocatable :: psi_rhs(:,:,:)
1190 
1191 real(dp), allocatable :: psikg_VL(:,:)
1192 
1193 
1194 complex(dpc), allocatable :: check_matrix(:,:), check_matrix2(:,:)
1195 complex(dpc), allocatable :: c_sternheimer_solutions(:,:)
1196 complex(dpc), allocatable :: QR_orthonormal_basis(:,:)
1197 
1198 complex(dpc), allocatable :: svd_matrix(:,:)
1199 real   (dp ), allocatable :: svd_values(:)
1200 
1201 
1202 integer                   :: iblk_lanczos, nbdblock_lanczos
1203 integer                   :: iblk_solutions, nbdblock_solutions
1204 integer                   :: mb
1205 
1206 character(128) :: filename
1207 logical        :: file_exists
1208 integer        :: io_unit
1209 
1210 character(128) :: filename_log
1211 integer        :: io_unit_log
1212 
1213 
1214 real(dp)      :: omega
1215 
1216 character(256) :: timing_string
1217 real(dp)       :: time1, time2
1218 real(dp)       :: time_exact
1219 
1220 
1221 integer  :: info
1222 integer  :: ierr
1223 
1224 real(dp)  ::  z(2)
1225 
1226 
1227 logical        :: omega_is_imaginary
1228 real(dp)       :: omega0
1229 
1230 logical        :: model
1231 logical        :: write_debug
1232 
1233 
1234 integer        :: mpi_communicator, mpi_rank, mpi_group
1235 
1236 ! *************************************************************************
1237 
1238 
1239 !================================================================================
1240 ! Prepare MPI information
1241 !================================================================================
1242 
1243 ! for LA configuration ,The processors communicate over band+FFT
1244 mpi_communicator = mpi_enreg%comm_bandfft
1245 
1246 ! what is the rank of this processor, within its group?
1247 mpi_rank  = mpi_enreg%me_fft
1248 
1249 ! Which group does this processor belong to, given the communicator?
1250 mpi_group = mpi_enreg%me_band
1251 
1252 
1253 
1254 
1255 !================================================================================
1256 ! Setup a log file, to keep track of the algorithm
1257 !================================================================================
1258 
1259 
1260 write(filename_log,'(A,I4.4,A)') 'ProjectedSternheimerEpsilon_PROC=',mpi_enreg%me,'.log'
1261 
1262 io_unit_log = get_unit()
1263 open(io_unit_log,file=filename_log,status=files_status_new)
1264 write(io_unit_log,10) ''
1265 write(io_unit_log,10) '#===================================================================================================='
1266 write(io_unit_log,10) "#                     ProjectedSternheimerEpsilon: log file                                          "
1267 write(io_unit_log,10) "#                     -------------------------------------------------------------------            "
1268 write(io_unit_log,10) "#                                                                                                    "
1269 write(io_unit_log,10) "# This file tracks the algorithm in the routine ProjectedSternheimerEpsilon. The goal is to          " 
1270 write(io_unit_log,10) "# establish where the algorithm crashes if it does, and/or  to track are far along the code is.      "
1271 write(io_unit_log,10) '#'
1272 write(io_unit_log,10) '#  MPI data for this process:'
1273 write(io_unit_log,10) '#'
1274 write(io_unit_log,22) '#    mpi_rank :',mpi_rank,'  (rank of this processor in its band group)'
1275 write(io_unit_log,22) '#    mpi_group:',mpi_group,' (band group to which this processor belongs)'
1276 write(io_unit_log,10) '#===================================================================================================='
1277 flush(io_unit_log)
1278 
1279 
1280 !================================================================================
1281 ! Setup timing; prepare arrays
1282 !================================================================================
1283 
1284 write(io_unit_log,10) " - Preparing and allocating arrays ...."
1285 flush(io_unit_log)
1286 
1287 
1288 
1289 timing_string = "#"
1290 call write_text_block_in_Timing_log(timing_string)
1291 timing_string = "#        ProjectedSternheimerEpsilon "
1292 call write_text_block_in_Timing_log(timing_string)
1293 timing_string = "#"
1294 call write_text_block_in_Timing_log(timing_string)
1295 
1296 ! Allocate the module array
1297 ABI_ALLOCATE(projected_dielectric_Lanczos_basis, (lmax,lmax,npt_gauss+1))
1298 
1299 projected_dielectric_Lanczos_basis(:,:,:) = cmplx_0
1300 
1301 ! initialize zero frequency with exact solution
1302 do l = 1, lmax
1303 projected_dielectric_Lanczos_basis(l,l,1) = cmplx_1*epsilon_eigenvalues_0(l)
1304 end do
1305 
1306 
1307 ! initialize other frequencies with the identity
1308 do iw = 2, npt_gauss + 1
1309 do l = 1, lmax
1310 projected_dielectric_Lanczos_basis(l,l,iw) = cmplx_1
1311 end do
1312 end do
1313 
1314 time_exact = zero
1315 
1316 
1317 !================================================================================
1318 ! Parallelisation of the code is subtle; Hamiltonian must act over 
1319 ! FFT rows, we must be careful with memory, etc...
1320 !
1321 ! We will parallelise in block of Lanczos vectors, not over bands.
1322 !================================================================================
1323 
1324 ! Number of blocks of lanczos vectors
1325 nbdblock_lanczos = lmax/blocksize
1326 if (modulo(lmax,blocksize) /= 0) nbdblock_lanczos = nbdblock_lanczos + 1
1327 
1328 
1329 if (present(use_model)) then
1330   model = use_model
1331 else
1332   model = .true.
1333 end if
1334 
1335 if (model) then
1336   ! Prepare the array that will contain the matrix elements of the model operator
1337   ABI_ALLOCATE(model_dielectric_Lanczos_basis, (lmax,lmax,npt_gauss+1))
1338 
1339   ! initialize with zero. NOT with the identity, in order to avoid extra communications
1340   ! (see below)
1341   model_dielectric_Lanczos_basis(:,:,:) = cmplx_0
1342 
1343 end if
1344 
1345 ! various working arrays
1346 
1347 ABI_ALLOCATE(psikg_valence    ,(2,npw_g))
1348 ABI_ALLOCATE(psir_valence     ,(2,n4,n5,n6))
1349 
1350 
1351 ABI_ALLOCATE(psikg_VL ,(2,npw_g))
1352 
1353 
1354 ABI_ALLOCATE(psik_wrk         ,(2,npw_k))
1355 ABI_ALLOCATE(psikb_wrk        ,(2,npw_kb))
1356 ABI_ALLOCATE(psikg_wrk        ,(2,npw_g))
1357 
1358 
1359 ABI_ALLOCATE(psi_gamma_l1     ,(2,npw_k))
1360 ABI_ALLOCATE(psi_gamma_l2     ,(2,npw_k))
1361 
1362 ABI_ALLOCATE(psi_rhs          ,(2,npw_k,lmax))
1363 
1364 
1365 ABI_ALLOCATE(psikg_in   ,(2,npw_g))
1366 ABI_ALLOCATE(psikg_out  ,(2,npw_g))
1367 
1368 
1369 ! maximal possible dimension of the solution space
1370 ! +1 because the solutions at $\omega=\infty$ are free.
1371 ! +1 if recycling is activated, because the solutions at $\omega=0$ are then available.
1372 i=1
1373 if(dtset%gwls_recycle == 1 .or. dtset%gwls_recycle == 2) then
1374   i=2
1375 end if
1376 lsolutions_max = lmax*(nfrequencies+i)
1377 
1378 
1379 ABI_ALLOCATE(local_Lbasis,           (npw_k,lmax))
1380 ABI_ALLOCATE(local_Lbasis_conjugated,(npw_k,lmax))
1381 ABI_ALLOCATE(YL,(npw_k))
1382 
1383 ABI_ALLOCATE(c_sternheimer_solutions,(npw_k,lsolutions_max))
1384 ABI_ALLOCATE(QR_orthonormal_basis   ,(npw_k,lsolutions_max))
1385 
1386 
1387 omega_is_imaginary = .true.
1388 
1389 
1390 ABI_ALLOCATE(svd_matrix,(npw_k,lsolutions_max))
1391 ABI_ALLOCATE(svd_values,(lsolutions_max))
1392 
1393 
1394 ! Prepare files for writing
1395 write_debug = debug .and. mpi_enreg%me == 0
1396 
1397 if ( write_debug ) then
1398 
1399   write(filename,'(A)') "ProjectedSternheimerEpsilon.log"
1400   inquire(file=filename,exist=file_exists)
1401 
1402   i = 0
1403   do while (file_exists)
1404   i = i+1
1405   write (filename,'(A,I0.4,A)') "ProjectedSternheimerEpsilon_",i,".log"
1406   inquire(file=filename,exist=file_exists)
1407   end do
1408 
1409 
1410   io_unit = get_unit()
1411   open(io_unit,file=filename,status=files_status_new)
1412   write(io_unit,10) ''
1413   write(io_unit,10) '#===================================================================================================='
1414   write(io_unit,10) "#                     Building the dielectic matrix using projected Sternheimer equation             "
1415   write(io_unit,10) "#                     -------------------------------------------------------------------            "
1416   write(io_unit,10) "#                                                                                                    "
1417   write(io_unit,10) '# This file contains some tests to check if the various elements entering the projected  '
1418   write(io_unit,10) '# dielectric matrix have the right properties. At this point, this is mostly for debugging.'
1419   write(io_unit,10) '# The wavefunctions and other related arrays are stored in reciprocal space.'
1420   write(io_unit,10) '#'
1421   write(io_unit,10) '#===================================================================================================='
1422   write(io_unit,10) ''
1423   flush(io_unit)
1424 end if
1425 
1426 if (debug) then
1427   ABI_ALLOCATE(check_matrix ,(lsolutions_max,lsolutions_max))
1428   ABI_ALLOCATE(check_matrix2,(lsolutions_max,lsolutions_max))
1429 end if
1430 
1431 !================================================================================
1432 !  Loop on all valence bands
1433 !================================================================================
1434 
1435 
1436 
1437 
1438 do v = 1, nbandv
1439 write(io_unit_log,10) '#===================================================================================================='
1440 write(io_unit_log,20) '#  valence band index:', v
1441 write(io_unit_log,10) '#===================================================================================================='
1442 flush(io_unit_log)
1443 
1444 
1445 
1446 
1447 if ( write_debug ) then
1448   write(io_unit,10) '#===================================================================================================='
1449   write(io_unit,20) '#  valence band index:', v
1450   write(io_unit,10) '#===================================================================================================='
1451   flush(io_unit)
1452 end if
1453 
1454 write(io_unit_log,10) '   - Fourier transform valence state ...'
1455 flush(io_unit_log)
1456 
1457 
1458 ! copy pre-calculated valence state in this covenient local array
1459 psikg_valence(:,:) = kernel_wavefunctions_FFT(:,:,v)
1460 
1461 ! compute fourier transform of valence state, and conjugate
1462 call g_to_r(psir_valence,psikg_valence) 
1463 psir_valence(2,:,:,:) = -psir_valence(2,:,:,:) 
1464 
1465 ! loop on all blocks of lanczos vectors
1466 write(io_unit_log,10) '   - Loop on all lanczos blocks to generate modified basis and Sternheimer RHS:'
1467 flush(io_unit_log)
1468 do iblk_lanczos = 1, nbdblock_lanczos
1469 !--------------------------------------------------------------------------
1470 ! Below, we build the modified basis, [ (V^{1/2} l)^* . psi_v], 
1471 ! as well as conjugated, projected form, Pc . [ (V^{1/2} l) . psi_v^*]. 
1472 !
1473 ! It is very irritating to have to do it this way, but I don't
1474 ! see an alternative; see discussion below.
1475 !
1476 !--------------------------------------------------------------------------
1477 
1478 write(io_unit_log,23) '       iblk_lanczos  = ',iblk_lanczos," / ",nbdblock_lanczos
1479 
1480 
1481 write(io_unit_log,10) '          -- Prepare modified basis computation...'
1482 flush(io_unit_log)
1483 
1484 
1485 
1486 ! loop on all states within this block
1487 do mb = 1, blocksize
1488 
1489 ! Determine the index of the Lanczos vector
1490 l = (iblk_lanczos-1)*blocksize + mb
1491 
1492 ! take a single lanczos vector
1493 
1494 if ( l <= lmax) then
1495   psik_wrk(1,:) = dble (Lbasis_lanczos(:,l))
1496   psik_wrk(2,:) = dimag(Lbasis_lanczos(:,l))
1497 else
1498   psik_wrk(:,:) =  zero
1499 end if
1500 
1501 
1502 ! Apply coulomb potential
1503 call sqrt_vc_k(psik_wrk)
1504 
1505 ! Store in array of blocks of wavefunctions
1506 psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:)
1507 
1508 end do ! mb
1509 
1510 ! Transform to FFT representation
1511 call wf_block_distribute(psikb_wrk,  psikg_VL,1) ! LA -> FFT 
1512 
1513 ! psikg_VL now contains | V^1/2 . l >, in FFT configuration
1514 
1515 
1516 !----------------------------------------------------------
1517 ! i) Compute the modified basis
1518 !----------------------------------------------------------
1519 write(io_unit_log,10) '          -- compute modified basis ...'
1520 flush(io_unit_log)
1521 
1522 
1523 
1524 ! Fourier transform to real space, and conjugate (psir1 is a global work array)
1525 call g_to_r(psir1,psikg_VL) 
1526 psir1(2,:,:,:) = -psir1(2,:,:,:) ! IS THIS STACK-DANGEROUS?
1527 
1528 ! Compute the real space product, and return to k space, in FFT configuration
1529 call gr_to_g(psikg_wrk,psir1,psikg_valence)
1530 
1531 ! psikg_wrk contains | (V^1/2 . l)^* phi_v >, in FFT configuration
1532 
1533 ! return to LA representation
1534 call wf_block_distribute(psikb_wrk,  psikg_wrk,2) ! FFT -> LA 
1535 
1536 ! store data, in LA representation
1537 do mb = 1, blocksize
1538 l = (iblk_lanczos-1)*blocksize + mb
1539 
1540 if ( l <= lmax) then
1541   ! local_Lbasis
1542   psik_wrk(:,:)     = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k)
1543   local_Lbasis(:,l) = cmplx_1*psik_wrk(1,:)+cmplx_i*psik_wrk(2,:)
1544 end if
1545 
1546 end do !mb 
1547 
1548 !--------------------------------------------------------------------------
1549 ! ii) Build the Sternheimer coefficients, at various frequencies,
1550 !     to define the Sternheimer basis.
1551 !--------------------------------------------------------------------------
1552 write(io_unit_log,20) '          -- Compute Sternheimer RHS...'
1553 flush(io_unit_log)
1554 
1555 
1556 ! psikg_wrk still contains | (V^1/2 . l)^* phi_v >, in FFT configuration
1557 
1558 !  Create right-hand-side of Sternheimer equation, in FFT configuration
1559 call pc_k_valence_kernel(psikg_wrk)
1560 call Hpsik(psikg_in,psikg_wrk,eig(v))
1561 call pc_k_valence_kernel(psikg_in)
1562 psikg_in(:,:) = -psikg_in(:,:) ! IS THIS STACK-DANGEROUS?
1563 
1564 ! return RHS  to LA representation, for explicit storage 
1565 call wf_block_distribute(psikb_wrk,  psikg_in,2) ! FFT -> LA 
1566 
1567 ! store data, in LA representation
1568 do mb = 1, blocksize
1569 l = (iblk_lanczos-1)*blocksize + mb
1570 
1571 if ( l <= lmax) then
1572   ! psi_rhs
1573   psi_rhs(:,:,l)    = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k)
1574 end if
1575 
1576 end do !mb 
1577 
1578 !----------------------------------------------------------
1579 ! iii) extract solutions for all projection frequencies
1580 !----------------------------------------------------------
1581 write(io_unit_log,20) '          -- Extract solutions for all projection frequencies...'
1582 flush(io_unit_log)
1583 
1584 
1585 
1586 do iw = 1, nfrequencies
1587 
1588 omega0 = list_projection_frequencies(iw)
1589 ! Solve Sternheimer equation
1590 
1591 projection = 0
1592 if(omega0 < 1d-12) projection=1
1593 
1594 ! solve A x = b, over the whole lanczos block
1595 call sqmr(psikg_in, psikg_out, eig(v), projection, omega0, omega_is_imaginary)
1596 
1597 ! return LA representation, for explicit storage 
1598 call wf_block_distribute(psikb_wrk,  psikg_out, 2) ! FFT -> LA 
1599 
1600 do mb = 1, blocksize
1601 l = (iblk_lanczos-1)*blocksize + mb
1602 
1603 if ( l <= lmax) then
1604   ls = (l-1)*nfrequencies+iw
1605 
1606   psik_wrk(:,:)     = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k)
1607 
1608   c_sternheimer_solutions(:,ls)= cmplx_1*psik_wrk(1,:)+cmplx_i*psik_wrk(2,:)
1609 end if
1610 
1611 end do ! mb
1612 
1613 end do ! iw
1614 
1615 !----------------------------------------------------------
1616 ! iv) Compute the conjugated, projected modified basis
1617 !----------------------------------------------------------
1618 
1619 write(io_unit_log,20) '          -- compute the conjugated, projected modified basis...'
1620 flush(io_unit_log)
1621 
1622 
1623 
1624 
1625 ! Compute the real space product, | (V^1/2. l) phi_v^* > and return to k space, in FFT configuration
1626 call gr_to_g(psikg_wrk, psir_valence, psikg_VL)
1627 
1628 ! project on conduction states
1629 call pc_k_valence_kernel(psikg_wrk)
1630 
1631 ! return to LA representation
1632 call wf_block_distribute(psikb_wrk,  psikg_wrk,2) ! FFT -> LA 
1633 
1634 ! store back, in LA configuration
1635 do mb = 1, blocksize
1636 l = (iblk_lanczos-1)*blocksize + mb
1637 
1638 if ( l <= lmax) then
1639   psik_wrk(:,:) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k)
1640   local_Lbasis_conjugated(:,l) = cmplx_1*psik_wrk(1,:)+cmplx_i*psik_wrk(2,:)
1641 end if
1642 
1643 end do !mb 
1644 
1645 end do ! iblk_lanczos
1646 
1647 
1648 
1649 write(io_unit_log,10) '   - Read in solutions at w=0 and/or w = infinity, if appropriate...'
1650 flush(io_unit_log)
1651 
1652 ! Begin with the storage of the solutions at $\omega = 0.$, which are free.
1653 if(dtset%gwls_recycle == 1) then 
1654   c_sternheimer_solutions(:,lsolutions_max-2*lmax+1:lsolutions_max-lmax) = cmplx_1*Sternheimer_solutions_zero(1,:,:,v) + &
1655   &                                                                             cmplx_i*Sternheimer_solutions_zero(2,:,:,v)
1656 end if
1657 if(dtset%gwls_recycle == 2) then
1658   do i=1,lmax
1659   recy_i = (i-1)*nbandv + v       
1660   !BUG : On petrus, NAG 5.3.1 + OpenMPI 1.6.2 cause read(...,rec=i) to read the data written by write(...,rec=i+1).
1661   read(recy_unit,rec=recy_i) psik_wrk
1662   c_sternheimer_solutions(:,lsolutions_max-2*lmax+i) = cmplx_1*psik_wrk(1,:) + cmplx_i*psik_wrk(2,:)
1663   end do
1664 end if
1665 
1666 ! and then continue with the storage of the vectors on which the Sternheimer solutions will be projected.
1667 c_sternheimer_solutions(:,lsolutions_max-lmax+1:lsolutions_max) = cmplx_1*psi_rhs(1,:,:) + cmplx_i*psi_rhs(2,:,:)
1668 ! Previously was = local_Lbasis; but analysis in the Lanczos article reveals psi_rhs should be better.
1669 ! Furthermore, tests show that, with psi_rhs, silane@1Ha has Sigma_c 0.01mHa away from the result with 
1670 ! gwls_list_proj_freq 0.0 1.0, in contrast with local_Lbasis, which has Sigma_c 0.3mHa away from the same result.
1671 
1672 if ( model ) then
1673 
1674   write(io_unit_log,10) '   - USE MODEL: model = .true., hence compute model model dielectric matrix...'
1675   flush(io_unit_log)
1676 
1677 
1678 
1679   !--------------------------------------------------------------------------
1680   ! Now that we have the modified basis, compute the matrix
1681   ! elements of the model dielectric operator
1682   !
1683   ! CAREFUL!
1684   !
1685   !        The model is given by 
1686   !
1687   !                P_model(iw) = sum_{v} phi_v(r) P_c.Y(iw).P_c phi_v^*(r')
1688   !
1689   !    such that        
1690   !
1691   !    <l1 | eps_model(iw) | l2 >  = delta_{l1,l2} 
1692   !    - sum_{v} < (V^{1/2}.l1).phi_v^*| Pc . Y . Pc | (V^{1/2}.l2).phi_v^* >
1693   !
1694   ! But local_Lbasis defined above corresponds to
1695   !                                Pc | (V^{1/2} .l )^* phi_v >.
1696   !
1697   ! This is why we must define local_Lbasis_conjugated, of the form
1698   !                                Pc | (V^{1/2} .l ) phi_v^* >.
1699   !
1700   !--------------------------------------------------------------------------
1701 
1702   do iw = 1, npt_gauss+1
1703 
1704   call setup_Pk_model(list_omega(iw),second_model_parameter)
1705 
1706   ! Only build the lower triangular part; the upper triangular part is obtained from the Hermitian conjugate
1707   do l1 = 1, lmax
1708 
1709   YL(:) = model_Y_LA(:)*local_Lbasis_conjugated(:,l1)
1710   do l2 = 1, l1
1711   model_dielectric_Lanczos_basis(l1,l2,iw) = model_dielectric_Lanczos_basis(l1,l2,iw)  &
1712   -complex_vector_product(YL, local_Lbasis_conjugated(:,l2),npw_k)
1713 
1714   end do
1715   end do
1716 
1717   end do ! iw
1718 
1719 
1720 end if
1721 
1722 
1723 !--------------------------------------------------------------------------
1724 ! Check explicitly that solutions satisfy the Sternheimer equations
1725 !--------------------------------------------------------------------------
1726 
1727 if ( debug ) then
1728 
1729   if (write_debug) then
1730     write(io_unit,10) "#--------------------------------------------------------------------------------"
1731     write(io_unit,10) "# Check explicitly that solutions satisfy the Sternheimer equation.              "
1732     write(io_unit,10) "#                                                                                "
1733     write(io_unit,10) "# Define:                                                                        "
1734     write(io_unit,10) "# E_l = || (omega^2+[H-Ev]^2) |phi_l> + Pc.[H-Ev].Pc |(V^1/2.q_l)^*.phi_v >  ||  "
1735     write(io_unit,10) "#--------------------------------------------------------------------------------"
1736     write(io_unit,10) '#  l                Im[omega] (Ha)                  E_l'
1737     write(io_unit,10) "#--------------------------------------------------------------------------------"
1738     flush(io_unit)
1739   end if
1740 
1741 
1742   do iblk_lanczos = 1, nbdblock_lanczos
1743 
1744   ! loop on all states within this block
1745   do mb = 1, blocksize
1746 
1747   l = (iblk_lanczos-1)*blocksize + mb
1748 
1749 
1750   if ( l <= lmax) then
1751     ! psik_wrk = | (V^{1/2} .l )^* phi_v >
1752     psik_wrk(1,:) = dble (local_Lbasis(:,l))
1753     psik_wrk(2,:) = dimag(local_Lbasis(:,l))
1754   else
1755     psik_wrk(:,:) = zero
1756   end if
1757 
1758   ! Store in array of blocks of wavefunctions
1759   psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:)
1760   end do ! mb
1761 
1762   ! Transform to FFT representation
1763   call wf_block_distribute(psikb_wrk,  psikg_wrk, 1) ! LA -> FFT 
1764 
1765   !  Create right-hand-side of Sternheimer equation
1766   call pc_k_valence_kernel(psikg_wrk)
1767   call Hpsik(psikg_in,psikg_wrk,eig(v))
1768   call pc_k_valence_kernel(psikg_in)
1769   psikg_in(:,:)  = -psikg_in(:,:) ! IS THIS STACK-DANGEROUS?
1770 
1771   do iw = 1, nfrequencies
1772   ! loop on all states within this block
1773   do mb = 1, blocksize
1774 
1775   l = (iblk_lanczos-1)*blocksize + mb
1776 
1777   ls = (l-1)*nfrequencies+iw
1778 
1779   if ( l <= lmax) then
1780     psik_wrk(1,:) = dble (c_sternheimer_solutions(:,ls))
1781     psik_wrk(2,:) = dimag(c_sternheimer_solutions(:,ls))
1782   else
1783     psik_wrk(:,:) = zero
1784   end if
1785 
1786   ! Store in array of blocks of wavefunctions
1787   psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:)
1788   end do
1789 
1790   ! Transform to FFT representation
1791   call wf_block_distribute(psikb_wrk,  psikg_wrk, 1) ! LA -> FFT 
1792 
1793 
1794   omega0 = list_projection_frequencies(iw)
1795 
1796   psikg_out(:,:) = omega0**2*psikg_wrk(:,:)
1797 
1798   call Hpsik(psikg_wrk,cte=eig(v))
1799   call Hpsik(psikg_wrk,cte=eig(v))
1800 
1801 
1802   psikg_out(:,:) = psikg_out(:,:) + psikg_wrk(:,:)-psikg_in(:,:)
1803   ! psikg_out now contains [ w0^2 + [H-epsilon_v]^2 ] | x > - |RHS>, in FFT configuration.
1804 
1805   ! bring it back to LA configuration
1806 
1807   ! Transform to FFT representation
1808   call wf_block_distribute(psikb_wrk,  psikg_out, 2) ! FFT -> LA 
1809 
1810   do mb = 1, blocksize
1811   l = (iblk_lanczos-1)*blocksize + mb
1812 
1813   if ( l <= lmax) then
1814     psik_wrk(:,:) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k)
1815 
1816     z(:) = cg_zdotc(npw_k ,psik_wrk, psik_wrk)
1817 
1818     call xmpi_sum(z, mpi_communicator,ierr) ! sum on all processors working on FFT!
1819     if (write_debug) write(io_unit,21)  l, omega0, sqrt(z(1))
1820   end if
1821   end do ! mb
1822 
1823   end do ! iw
1824   end do ! iblk_lanczos 
1825 
1826 end if
1827 
1828 !--------------------------------------------------------------------------
1829 ! Step 5: Perform a singular value decomposition to extract a
1830 !         linearly independent basis for the solution space.
1831 !
1832 !--------------------------------------------------------------------------
1833 write(io_unit_log,10) '   - Perform SVD to extract linearly independent basis to Sternheimer equation...'
1834 flush(io_unit_log)
1835 
1836 
1837 
1838 
1839 svd_matrix(:,:) =  c_sternheimer_solutions(:,:)
1840 
1841 call extract_SVD(mpi_communicator, npw_k,lsolutions_max,svd_matrix,svd_values)
1842 
1843 if ( write_debug ) then
1844   write(io_unit,10) "#--------------------------------------------------------------------------------"
1845   write(io_unit,10) "# Check the singular value decomposition of the arrays"
1846   write(io_unit,10) '#  l                          svd'
1847   write(io_unit,10) "#--------------------------------------------------------------------------------"
1848   flush(io_unit)
1849 end if
1850 
1851 lsolutions = 0
1852 QR_orthonormal_basis(:,:) = cmplx_0
1853 do l=1, lsolutions_max
1854 if (svd_values(l) > svd_tolerance ) then
1855   lsolutions = lsolutions + 1
1856 
1857   if ( write_debug ) then
1858     write(io_unit,14)   l,svd_values(l)
1859     flush(io_unit)
1860   end if
1861   QR_orthonormal_basis(:,l) = svd_matrix(:,l)
1862 
1863 else
1864   if ( write_debug ) then
1865     write(io_unit,15)   l,svd_values(l),' SVD value too small! Vector to be discarded!'
1866     flush(io_unit)
1867   end if
1868 end if
1869 end do
1870 
1871 !--------------------------------------------------------------------------
1872 ! Step 6: project all relevant arrays onto the newly defined orthonormal
1873 !         basis.
1874 !--------------------------------------------------------------------------
1875 write(io_unit_log,10) '   - Compute the B matrix...'
1876 flush(io_unit_log)
1877 
1878 if (debug) then
1879   check_matrix(:,:) = cmplx_0
1880   do l = 1, lsolutions
1881   check_matrix(l,l) = -cmplx_1
1882   end do
1883 end if
1884 
1885 ABI_ALLOCATE(ipiv                ,(lsolutions))
1886 ABI_ALLOCATE(sternheimer_A0      ,(lsolutions,lsolutions))
1887 ABI_ALLOCATE(sternheimer_B       ,(lsolutions,lmax))
1888 ABI_ALLOCATE(sternheimer_G       ,(lmax,lsolutions))
1889 
1890 ! Compute the X matrix and the check_matrix
1891 do l1 = 1, lsolutions
1892 
1893 psi_gamma_l1(1,:) = real (QR_orthonormal_basis(:,l1))
1894 psi_gamma_l1(2,:) = dimag(QR_orthonormal_basis(:,l1))
1895 
1896 do l2 = 1, lsolutions
1897 
1898 if (l2 <= lmax) then
1899   z(:) = cg_zdotc(npw_k, psi_gamma_l1,  psi_rhs(:,:,l2))
1900   call xmpi_sum(z, mpi_communicator,ierr) ! sum on all processors for LA configuration
1901 
1902   sternheimer_B(l1,l2) = cmplx_1*z(1)+cmplx_i*z(2)
1903 end if
1904 
1905 if (debug)  then
1906   psi_gamma_l2(1,:) = dble (QR_orthonormal_basis(:,l2))
1907   psi_gamma_l2(2,:) = dimag(QR_orthonormal_basis(:,l2))
1908 
1909   z(:) = cg_zdotc(npw_k, psi_gamma_l1,  psi_gamma_l2)  
1910   call xmpi_sum(z, mpi_communicator,ierr) ! sum on all processors 
1911 
1912   check_matrix(l1,l2) = check_matrix(l1,l2) + cmplx_1*z(1)+cmplx_i*z(2)
1913 end if
1914 
1915 end do ! l2
1916 end do ! l1
1917 
1918 
1919 ! Number of blocks of solution vectors
1920 nbdblock_solutions = lsolutions/blocksize
1921 
1922 if (modulo(lsolutions,blocksize) /= 0) nbdblock_solutions = nbdblock_solutions + 1
1923 
1924 
1925 write(io_unit_log,10) '   - Compute the A0 matrix...'
1926 flush(io_unit_log)
1927 
1928 ! Compute the A matrix
1929 do iblk_solutions =1, nbdblock_solutions 
1930 
1931 do mb = 1, blocksize
1932 l2 = (iblk_solutions-1)*blocksize + mb
1933 
1934 if ( l2 <= lsolutions) then
1935   psik_wrk(1,:) = dble (QR_orthonormal_basis(:,l2))
1936   psik_wrk(2,:) = dimag(QR_orthonormal_basis(:,l2))
1937 else
1938   psik_wrk(:,:) =  zero
1939 end if
1940 
1941 ! Store in array of blocks of wavefunctions
1942 psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:)
1943 end do
1944 
1945 ! Transform to FFT representation
1946 call wf_block_distribute(psikb_wrk,  psikg_wrk,1) ! LA -> FFT 
1947 
1948 ! act twice with the Hamiltonian operator
1949 call Hpsik(psikg_out,psikg_wrk,eig(v))
1950 call Hpsik(psikg_out,cte=eig(v))
1951 
1952 ! return to LA representation
1953 call wf_block_distribute(psikb_wrk,  psikg_out,2) ! FFT -> LA
1954 
1955 do mb = 1, blocksize
1956 l2 = (iblk_solutions-1)*blocksize + mb
1957 
1958 if ( l2 <= lsolutions) then
1959   psik_wrk(:,:)     = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k)
1960 else
1961   psik_wrk(:,:)     = zero
1962 end if
1963 
1964 do l1 = 1, lsolutions
1965 
1966 psi_gamma_l1(1,:) = real (QR_orthonormal_basis(:,l1))
1967 psi_gamma_l1(2,:) = dimag(QR_orthonormal_basis(:,l1))
1968 
1969 z(:) = cg_zdotc(npw_k, psi_gamma_l1,  psik_wrk)
1970 call xmpi_sum(z,mpi_communicator,ierr) ! sum on all processors working on FFT!
1971 
1972 
1973 if ( l2 <= lsolutions ) then
1974   sternheimer_A0(l1,l2) = cmplx_1*z(1)+cmplx_i*z(2)
1975 end if
1976 
1977 
1978 end do ! l1
1979 end do ! mb
1980 end do ! iblk_solutions 
1981 
1982 
1983 if (debug) then
1984   ! HERE we use a dummy variable to avoid operations which might blow the stack!
1985   ! Stack overflows lead to hard-to-find bugs; let's avoid putting them in here.
1986   ABI_ALLOCATE(dummy_tmp_1,(lsolutions,lsolutions))
1987   ABI_ALLOCATE(dummy_tmp_2,(lsolutions,lsolutions))
1988 
1989 
1990   dummy_tmp_1(:,:)= transpose(sternheimer_A0(:,:))
1991   dummy_tmp_2(:,:)= conjg(dummy_tmp_1(:,:))
1992 
1993   check_matrix2(:,:) = sternheimer_A0(:,:)-dummy_tmp_2(:,:)
1994 
1995   ABI_DEALLOCATE(dummy_tmp_1)
1996   ABI_DEALLOCATE(dummy_tmp_2)
1997 end if
1998 
1999 write(io_unit_log,10) '   - Compute the GAMMA matrix...'
2000 flush(io_unit_log)
2001 
2002 
2003 ! Compute the GAMMA matrices
2004 do l1 = 1, lmax
2005 
2006 ! psik_wrk = | (V^{1/2} . l )^* phi_v >
2007 psik_wrk(1,:) = dble (local_Lbasis(:,l1) )
2008 psik_wrk(2,:) = dimag(local_Lbasis(:,l1))
2009 
2010 
2011 do l2 = 1, lsolutions
2012 
2013 psi_gamma_l2(1,:) = real (QR_orthonormal_basis(:,l2))
2014 psi_gamma_l2(2,:) = dimag(QR_orthonormal_basis(:,l2))
2015 
2016 
2017 ! Note that G_{lJ} = < l | Vc^{1/2}.(gamma_J^*.phi_v)>
2018 !                  = < gamma_J | (Vc^{1/2}.l^*).phi_v >
2019 
2020 z(:) = cg_zdotc(npw_k,psi_gamma_l2, psik_wrk)
2021 call xmpi_sum(z,mpi_communicator,ierr) ! sum on all processors working on FFT!
2022 sternheimer_G(l1,l2) = cmplx_1*z(1)+cmplx_i*z(2)
2023 
2024 end do ! l1
2025 end do ! l2
2026 
2027 
2028 
2029 if ( write_debug ) then
2030   write(io_unit,19)   '<gamma| gamma>', sqrt(sum(abs(check_matrix(:,:))**2))
2031   write(io_unit,19)   '  M hermitian ',sqrt(sum(abs(check_matrix2(:,:))**2))
2032   write(io_unit,10)   " "
2033   write(io_unit,10)   "# GAMMA Matrix:"
2034   write(io_unit,10)   " "
2035 
2036   do l1 = 1, lmax
2037   write(io_unit,30) sternheimer_G(l1,:)
2038   end do
2039   flush(io_unit)
2040 end if
2041 
2042 !--------------------------------------------------------------------------
2043 ! Step 7: Compute the solutions
2044 !--------------------------------------------------------------------------
2045 
2046 write(io_unit_log,10) '   - Compute the Projected Sternheimer solutions and build approximate dielectric operator...'
2047 flush(io_unit_log)
2048 
2049 
2050 
2051 ABI_ALLOCATE(sternheimer_A ,(lsolutions,lsolutions))
2052 ABI_ALLOCATE(sternheimer_X       ,(lsolutions,lmax))
2053 do iw = 2, npt_gauss + 1
2054 
2055 write(io_unit_log,23) '        -- iw = ',iw,' / ',npt_gauss+1
2056 flush(io_unit_log)
2057 
2058 
2059 omega = list_omega(iw)
2060 
2061 sternheimer_A(:,:) = sternheimer_A0(:,:) 
2062 
2063 do l = 1, lsolutions
2064 sternheimer_A(l,l) = sternheimer_A(l,l) + omega**2
2065 end do
2066 
2067 sternheimer_X(:,:) = sternheimer_B(:,:) 
2068 
2069 !--------------------------------------------------------------------------
2070 ! Step 2:  solve A*X = B, a projected form of the Sternheimer equation
2071 !--------------------------------------------------------------------------
2072 write(io_unit_log,10) '        -- Solve A*X = B'
2073 flush(io_unit_log)
2074 
2075 
2076 
2077 call cpu_time(time1)
2078 call zgesv(lsolutions,      & ! number of rows of A matrix
2079 lmax,            & ! number of columns of B matrix
2080 sternheimer_A,   & ! The A matrix on input, the LU factorization on output
2081 lsolutions,      & ! leading dimension of A
2082 ipiv,            & ! array of pivots
2083 sternheimer_X,   & ! B matrix on input, solution X on output
2084 lsolutions,      & ! leading dimension of B
2085 info )
2086 call cpu_time(time2)
2087 
2088 time_exact = time_exact + time2-time1
2089 
2090 !--------------------------------------------------------------------------
2091 ! Step 3: Add contribution to projected epsilon
2092 !--------------------------------------------------------------------------
2093 ! perform E = E  -4 sum_l Gamma_v*X^*_v
2094 
2095 write(io_unit_log,10) '        -- add contribution to dielectric matrix at this frequency'
2096 flush(io_unit_log)
2097 
2098 
2099 
2100 ABI_ALLOCATE(dummy_tmp_1,(lsolutions,lmax))
2101 dummy_tmp_1(:,:) = conjg(sternheimer_X) ! DO THIS to avoid potential stack problems
2102 
2103 call cpu_time(time1)
2104 call zgemm(     'N',    & ! A matrix is in normal order
2105 'N',    & ! B matrix is in normal order
2106 lmax,    & ! number of rows of A
2107 lmax,    & ! number of columns of B
2108 lsolutions,    & ! number of columns of A
2109 -4.0_dp*cmplx_1,    & ! premultiply A*B by this scalar
2110 sternheimer_G,    & ! GAMMA matrix
2111 lmax,    & ! leading dimension of A
2112 dummy_tmp_1,    & ! B matrix
2113 lsolutions,    & ! leading dimension of B
2114 cmplx_1,    & ! beta  is one
2115 projected_dielectric_Lanczos_basis(:,:,iw),    & ! C matrix
2116 lmax)      ! leading dimension of C
2117 
2118 ABI_DEALLOCATE(dummy_tmp_1)
2119 
2120 call cpu_time(time2)
2121 time_exact = time_exact + time2-time1
2122 
2123 end do ! iw
2124 
2125 write(io_unit_log,10) '   - Deallocate tmp arrays...'
2126 flush(io_unit_log)
2127 
2128 
2129 
2130 
2131 ABI_DEALLOCATE(ipiv           )
2132 ABI_DEALLOCATE(sternheimer_A  )
2133 ABI_DEALLOCATE(sternheimer_A0 )
2134 ABI_DEALLOCATE(sternheimer_B  )
2135 ABI_DEALLOCATE(sternheimer_X  )
2136 ABI_DEALLOCATE(sternheimer_G  )
2137 
2138 end do ! v
2139 
2140 !--------------------------------------------------------------------------
2141 ! Finalize, post v loop
2142 !--------------------------------------------------------------------------
2143 write(io_unit_log,10) " - Finalize, after band iterations...."
2144 flush(io_unit_log)
2145 
2146 
2147 
2148 
2149 
2150 timing_string = "#        Exact Sector :   "
2151 call write_timing_log(timing_string,time_exact)
2152 
2153 
2154 ABI_ALLOCATE(dummy_tmp_1,(lmax,lmax))
2155 ABI_ALLOCATE(dummy_tmp_2,(lmax,lmax))
2156 
2157 do iw = 2, npt_gauss+1
2158 ! finally, make sure matrix is hermitian
2159 
2160 ! play this little game to avoid STACK problems
2161 dummy_tmp_1(:,:) = 0.5_dp*transpose(projected_dielectric_Lanczos_basis(:,:,iw))
2162 dummy_tmp_2(:,:) = conjg(dummy_tmp_1(:,:))
2163 dummy_tmp_1(:,:) = dummy_tmp_2(:,:) +0.5_dp*projected_dielectric_Lanczos_basis(:,:,iw)
2164 
2165 projected_dielectric_Lanczos_basis(:,:,iw) =    dummy_tmp_1(:,:)
2166 end do
2167 
2168 ABI_DEALLOCATE(dummy_tmp_1)
2169 ABI_DEALLOCATE(dummy_tmp_2)
2170 
2171 
2172 
2173 if ( write_debug ) then
2174   !write some results to a file
2175 
2176   write(io_unit,10) '#===================================================================================================='
2177   write(io_unit,10) "#                     Projected dielectric matrices                                                  "
2178   write(io_unit,10) "#                     -------------------------------------------------------                        "
2179   write(io_unit,10) "# This file contains the various projected dielectric matrices as a function of frequency.           "
2180   write(io_unit,10) '#===================================================================================================='
2181   write(io_unit,10) ''
2182   flush(io_unit)
2183 
2184   do iw = 1, npt_gauss+1
2185   write(io_unit,10) "#"
2186   write(io_unit,12) "# omega = ",list_omega(iw), " i Ha"
2187   write(io_unit,10) "#"
2188 
2189   do l =1, lmax
2190   write(io_unit,30)  projected_dielectric_Lanczos_basis(l,:,iw)
2191   end do 
2192   end do 
2193 
2194 end if
2195 
2196 
2197 if ( model ) then
2198 
2199 
2200   ! Add all components on the processors
2201   call xmpi_sum(model_dielectric_Lanczos_basis,mpi_communicator,ierr) ! sum on all processors
2202 
2203   ! add the identity
2204   do iw = 1, npt_gauss+1
2205   do l= 1, lmax
2206   model_dielectric_Lanczos_basis(l,l,iw) = model_dielectric_Lanczos_basis(l,l,iw) + cmplx_1
2207   end do
2208   end do 
2209 
2210   ! hermitian the operator
2211   do iw = 1, npt_gauss+1
2212 
2213   do l1 = 1, lmax
2214   do l2 = 1, l1
2215   ! operator is hermitian
2216   model_dielectric_Lanczos_basis(l2,l1,iw) = conjg(model_dielectric_Lanczos_basis(l1,l2,iw))
2217   end do
2218   end do
2219 
2220   end do ! iw
2221 
2222 end if
2223 
2224 
2225 
2226 if ( write_debug ) then
2227   close(io_unit)
2228 end if
2229 
2230 write(io_unit_log,10) " - Deallocate and exit...."
2231 flush(io_unit_log)
2232 
2233 
2234 
2235 if (debug) then
2236   ABI_DEALLOCATE(check_matrix)
2237   ABI_DEALLOCATE(check_matrix2)
2238 end if
2239 
2240 
2241 ABI_DEALLOCATE(psikg_valence)
2242 ABI_DEALLOCATE(psir_valence)
2243 ABI_DEALLOCATE(psikg_VL)
2244 
2245 
2246 ABI_DEALLOCATE(YL)
2247 
2248 ABI_DEALLOCATE(local_Lbasis_conjugated)
2249 ABI_DEALLOCATE(local_Lbasis)
2250 
2251 
2252 ABI_DEALLOCATE(svd_matrix)
2253 ABI_DEALLOCATE(svd_values)
2254 ABI_DEALLOCATE(psi_gamma_l1)
2255 ABI_DEALLOCATE(psi_gamma_l2)
2256 
2257 ABI_DEALLOCATE(psikg_in)
2258 ABI_DEALLOCATE(psikg_out)
2259 
2260 ABI_DEALLOCATE(psi_rhs)
2261 
2262 ABI_DEALLOCATE(c_sternheimer_solutions)
2263 ABI_DEALLOCATE(QR_orthonormal_basis)
2264 
2265 ABI_DEALLOCATE(psik_wrk)
2266 ABI_DEALLOCATE(psikb_wrk)
2267 ABI_DEALLOCATE(psikg_wrk)
2268 
2269 
2270 close(io_unit_log)
2271 
2272 
2273 10 format(A)
2274 12 format(A,F12.8,A)
2275 14 format(I5,10X,ES24.12)
2276 15 format(I5,10X,ES24.12,10X,A)
2277 
2278 19 format(20X,A,15X,E24.16)
2279 
2280 20 format(A,I5)
2281 21 format(I5,10X,F8.4,15X,ES24.12)
2282 22 format(A,I5,A)
2283 23 format(A,I5,A,I5)
2284 30 format(2X,1000(ES12.4,2X,ES12.4,5X))
2285 
2286 end subroutine ProjectedSternheimerEpsilon