TABLE OF CONTENTS


ABINIT/pw_orthon [ Functions ]

[ Top ] [ Functions ]

NAME

 pw_orthon

FUNCTION

 Normalize nvec complex vectors each of length nelem and then orthogonalize by modified Gram-Schmidt.
 Two orthogonality conditions are available:
  Simple orthogonality: ${<Vec_{i}|Vec_{j}>=Delta_ij}$
  Orthogonality with overlap S: ${<Vec_{i}|S|Vec_{j}>=Delta_ij}$

COPYRIGHT

 Copyright (C) 1998-2018 ABINIT group (DCA,XG,GMR,FF,MT)
 This file is distributed under the terms of the
 GNU General Public License,see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt.

INPUTS

  icg=shift to be given to the location of the data in cg(=vecnm)
  igsc=shift to be given to the location of the data in gsc(=ovl_vecnm)
  istwf_k=option parameter that describes the storage of wfs
  mcg=maximum size of second dimension of cg(=vecnm)
  mgsc=maximum size of second dimension of gsc(=ovl_vecnm)
  nelem=number of complex elements in each vector
  nvec=number of vectors to be orthonormalized
  ortalgo= option for the choice of the algorithm
         -1: no orthogonalization (direct return)
          0 or 2: old algorithm (use of buffers)
          1: new algorithm (use of blas)
          3: new new algorithm (use of lapack without copy)
  useoverlap=select the orthogonality condition
               0: no overlap between vectors
               1: vectors are overlapping
  me_g0=1 if this processor has G=0, 0 otherwise
  comm=MPI communicator

SIDE EFFECTS

  vecnm= input: vectors to be orthonormalized; array of nvec column
                vectors,each of length nelem,shifted by icg
                This array is complex or else real(dp) of twice length
         output: orthonormalized set of vectors
  if (useoverlap==1) only:
    ovl_vecnm= input: product of overlap and input vectors:
                      S|vecnm>,where S is the overlap operator
               output: updated S|vecnm> according to vecnm

NOTES

 Note that each vector has an arbitrary phase which is not fixed in this routine.

 WARNING : not yet suited for nspinor=2 with istwfk/=1

PARENTS

      lapackprof,vtowfk,wfconv

CHILDREN

      abi_xcopy,abi_xorthonormalize,abi_xtrsm,cgnc_cholesky,cgnc_gramschmidt
      cgpaw_cholesky,cgpaw_gramschmidt,ortho_reim,timab,xmpi_sum

SOURCE

 61 #if defined HAVE_CONFIG_H
 62 #include "config.h"
 63 #endif
 64 
 65 #include "abi_common.h"
 66 
 67 subroutine pw_orthon(icg,igsc,istwf_k,mcg,mgsc,nelem,nvec,ortalgo,ovl_vecnm,useoverlap,vecnm,me_g0,comm)
 68 
 69  use defs_basis
 70  use m_errors
 71  use m_profiling_abi
 72  use m_xmpi
 73  use m_cgtools
 74  use m_abi_linalg
 75 
 76 !This section has been created automatically by the script Abilint (TD).
 77 !Do not modify the following lines by hand.
 78 #undef ABI_FUNC
 79 #define ABI_FUNC 'pw_orthon'
 80  use interfaces_18_timing
 81 !End of the abilint section
 82 
 83  implicit none
 84 
 85 !Arguments ------------------------------------
 86 !scalars
 87  integer,intent(in) :: icg,igsc,istwf_k,mcg,mgsc,nelem,nvec,ortalgo,useoverlap,me_g0,comm
 88 !arrays
 89  real(dp),intent(inout) :: ovl_vecnm(2,mgsc*useoverlap),vecnm(2,mcg)
 90 
 91 !Local variables-------------------------------
 92 !scalars
 93  integer :: ierr,ii,ii0,ii1,ii2,ivec,ivec2
 94  integer :: rvectsiz,vectsize,cg_idx,gsc_idx
 95  real(dp) :: doti,dotr,sum,xnorm
 96  character(len=500) :: msg
 97 !arrays
 98  integer :: cgindex(nvec),gscindex(nvec)
 99  real(dp) :: buffer2(2),tsec(2)
100  real(dp),allocatable :: rblockvectorbx(:,:),rblockvectorx(:,:),rgramxbx(:,:)
101  complex(dpc),allocatable :: cblockvectorbx(:,:),cblockvectorx(:,:)
102  complex(dpc),allocatable :: cgramxbx(:,:)
103 
104 ! *************************************************************************
105 
106 #ifdef DEBUG_MODE
107 !Make sure imaginary part at G=0 vanishes
108  if (istwf_k==2) then
109    do ivec=1,nvec
110      if(abs(vecnm(2,1+nelem*(ivec-1)+icg))>zero)then
111 !      if(abs(vecnm(2,1+nelem*(ivec-1)+icg))>tol16)then
112        write(msg,'(2a,3i0,2es16.6,a,a)')&
113 &       ' For istwf_k=2,observed the following element of vecnm :',ch10,&
114 &       nelem,ivec,icg,vecnm(1:2,1+nelem*(ivec-1)+icg),ch10,&
115 &       '  with a non-negligible imaginary part.'
116        MSG_BUG(msg)
117      end if
118    end do
119  end if
120 #endif
121 
122 !Nothing to do if ortalgo=-1
123  if(ortalgo==-1) return
124 
125  do ivec=1,nvec
126    cgindex(ivec)=nelem*(ivec-1)+icg+1
127    gscindex(ivec)=nelem*(ivec-1)+igsc+1
128  end do
129 
130  if (ortalgo==3) then
131 !  =========================
132 !  First (new new) algorithm
133 !  =========================
134 !  NEW VERSION: avoid copies, use ZHERK for NC
135    cg_idx = cgindex(1)
136    if (useoverlap==1) then
137      gsc_idx = gscindex(1)
138      call cgpaw_cholesky(nelem,nvec,vecnm(1,cg_idx),ovl_vecnm(1,gsc_idx),istwf_k,me_g0,comm)
139    else
140      call cgnc_cholesky(nelem,nvec,vecnm(1,cg_idx),istwf_k,me_g0,comm,use_gemm=.FALSE.)
141    end if
142 
143  else if(ortalgo==1) then
144 !  =======================
145 !  Second (new) algorithm
146 !  =======================
147 !  This first algorithm seems to be more efficient especially in the parallel band-FFT mode.
148 
149    if(istwf_k==1) then
150      vectsize=nelem
151      ABI_ALLOCATE(cgramxbx,(nvec,nvec))
152      ABI_ALLOCATE(cblockvectorx,(vectsize,nvec))
153      ABI_ALLOCATE(cblockvectorbx,(vectsize,nvec))
154      call abi_xcopy(nvec*vectsize,vecnm(:,cgindex(1):cgindex(nvec)-1),1,cblockvectorx,1,x_cplx=2)
155      if (useoverlap == 1) then
156        call abi_xcopy(nvec*vectsize,ovl_vecnm(:,gscindex(1):gscindex(nvec)-1),1,cblockvectorbx,1,x_cplx=2)
157      else
158        call abi_xcopy(nvec*vectsize,vecnm(:,cgindex(1):cgindex(nvec)-1),1,cblockvectorbx,1,x_cplx=2)
159      end if
160      call abi_xorthonormalize(cblockvectorx,cblockvectorbx,nvec,comm,cgramxbx,vectsize)
161      call abi_xcopy(nvec*vectsize,cblockvectorx,1,vecnm(:,cgindex(1):cgindex(nvec)-1),1,x_cplx=2)
162      if (useoverlap == 1) then
163        call abi_xtrsm('r','u','n','n',vectsize,nvec,cone,cgramxbx,nvec,cblockvectorbx,vectsize)
164        call abi_xcopy(nvec*vectsize,cblockvectorbx,1,ovl_vecnm(:,gscindex(1):gscindex(nvec)-1),1,x_cplx=2)
165      end if
166      ABI_DEALLOCATE(cgramxbx)
167      ABI_DEALLOCATE(cblockvectorx)
168      ABI_DEALLOCATE(cblockvectorbx)
169 
170    else if(istwf_k==2) then
171      ! Pack real and imaginary part of the wavefunctions.
172      rvectsiz=nelem
173      vectsize=2*nelem; if(me_g0==1) vectsize=vectsize-1
174      ABI_ALLOCATE(rgramxbx,(nvec,nvec))
175      ABI_ALLOCATE(rblockvectorx,(vectsize,nvec))
176      ABI_ALLOCATE(rblockvectorbx,(vectsize,nvec))
177      do ivec=1,nvec
178        if (me_g0 == 1) then
179          call abi_xcopy(1,vecnm(1,cgindex(ivec)),1,rblockvectorx (1,ivec),1)
180          call abi_xcopy(rvectsiz-1,vecnm(1,cgindex(ivec)+1),2,rblockvectorx(2,ivec),1)
181          call abi_xcopy(rvectsiz-1,vecnm(2,cgindex(ivec)+1),2,rblockvectorx(rvectsiz+1,ivec),1)
182          if (useoverlap == 1) then
183            call abi_xcopy(1,ovl_vecnm(1,gscindex(ivec)),1,rblockvectorbx(1,ivec),1)
184            call abi_xcopy(rvectsiz-1,ovl_vecnm(1,gscindex(ivec)+1),2,rblockvectorbx(2,ivec),1)
185            call abi_xcopy(rvectsiz-1,ovl_vecnm(2,gscindex(ivec)+1),2,rblockvectorbx(rvectsiz+1,ivec),1)
186          else
187            call abi_xcopy(1,vecnm(1,cgindex(ivec)),1,rblockvectorbx(1,ivec),1)
188            call abi_xcopy(rvectsiz-1,vecnm(1,cgindex(ivec)+1),2,rblockvectorbx(2,ivec),1)
189            call abi_xcopy(rvectsiz-1,vecnm(2,cgindex(ivec)+1),2,rblockvectorbx(rvectsiz+1,ivec),1)
190          end if
191          rblockvectorx (2:vectsize,ivec)=rblockvectorx (2:vectsize,ivec)*sqrt2
192          rblockvectorbx(2:vectsize,ivec)=rblockvectorbx(2:vectsize,ivec)*sqrt2
193        else
194          call abi_xcopy(rvectsiz,vecnm(1,cgindex(ivec)),2,rblockvectorx(1,ivec),1)
195          call abi_xcopy(rvectsiz,vecnm(2,cgindex(ivec)),2,rblockvectorx(rvectsiz+1,ivec),1)
196          if (useoverlap == 1) then
197            call abi_xcopy(rvectsiz,ovl_vecnm(1,gscindex(ivec)),2,rblockvectorbx(1,ivec),1)
198            call abi_xcopy(rvectsiz,ovl_vecnm(2,gscindex(ivec)),2,rblockvectorbx(rvectsiz+1,ivec),1)
199          else
200            call abi_xcopy(rvectsiz,vecnm(1,cgindex(ivec)),2,rblockvectorbx(1,ivec),1)
201            call abi_xcopy(rvectsiz,vecnm(2,cgindex(ivec)),2,rblockvectorbx(rvectsiz+1,ivec),1)
202          end if
203          rblockvectorx (1:vectsize,ivec)=rblockvectorx (1:vectsize,ivec)*sqrt2
204          rblockvectorbx(1:vectsize,ivec)=rblockvectorbx(1:vectsize,ivec)*sqrt2
205        end if
206      end do
207 
208      call ortho_reim(rblockvectorx,rblockvectorbx,nvec,comm,rgramxbx,vectsize)
209 
210      do ivec=1,nvec
211        ! Unpack results
212        if (me_g0 == 1) then
213          call abi_xcopy(1,rblockvectorx(1,ivec),1,vecnm(1,cgindex(ivec)),1)
214          vecnm(2,cgindex(ivec))=zero
215          rblockvectorx(2:vectsize,ivec)=rblockvectorx(2:vectsize,ivec)/sqrt2
216          call abi_xcopy(rvectsiz-1,rblockvectorx(2,ivec),1,vecnm(1,cgindex(ivec)+1),2)
217          call abi_xcopy(rvectsiz-1,rblockvectorx(rvectsiz+1,ivec),1,vecnm(2,cgindex(ivec)+1),2)
218        else
219          rblockvectorx(1:vectsize,ivec)=rblockvectorx(1:vectsize,ivec)/sqrt2
220          call abi_xcopy(rvectsiz,rblockvectorx(1,ivec),1,vecnm(1,cgindex(ivec)),2)
221          call abi_xcopy(rvectsiz,rblockvectorx(rvectsiz+1,ivec),1,vecnm(2,cgindex(ivec)),2)
222        end if
223 
224        if(useoverlap == 1) then
225          call abi_xtrsm('r','u','n','n',vectsize,nvec,one,rgramxbx,nvec,rblockvectorbx,vectsize)
226          if (me_g0 == 1) then
227            call abi_xcopy(1,rblockvectorbx(1,ivec),1,ovl_vecnm(1,gscindex(ivec)),1)
228            ovl_vecnm(2,gscindex(ivec))=zero
229            rblockvectorbx(2:vectsize,ivec)=rblockvectorbx(2:vectsize,ivec)/sqrt2
230            call abi_xcopy(rvectsiz-1,rblockvectorbx(2,ivec),1,ovl_vecnm(1,gscindex(ivec)+1),2)
231            call abi_xcopy(rvectsiz-1,rblockvectorbx(rvectsiz+1,ivec),1,ovl_vecnm(2,gscindex(ivec)+1),2)
232          else
233            rblockvectorbx(1:vectsize,ivec)=rblockvectorbx(1:vectsize,ivec)/sqrt2
234            call abi_xcopy(rvectsiz,rblockvectorbx(1,ivec),1,ovl_vecnm(1,gscindex(ivec)),2)
235            call abi_xcopy(rvectsiz,rblockvectorbx(rvectsiz+1,ivec),1,ovl_vecnm(2,gscindex(ivec)),2)
236          end if
237        end if
238      end do
239      ABI_DEALLOCATE(rgramxbx)
240      ABI_DEALLOCATE(rblockvectorx)
241      ABI_DEALLOCATE(rblockvectorbx)
242    end if
243 
244  else if (ortalgo==4) then 
245 !  else if (ANY(ortalgo==(/0,2/))) then 
246 
247    cg_idx = cgindex(1)
248    if (useoverlap==0) then 
249      call cgnc_gramschmidt(nelem,nvec,vecnm(1,cg_idx),istwf_k,me_g0,comm)
250    else 
251      gsc_idx = gscindex(1)
252      call cgpaw_gramschmidt(nelem,nvec,vecnm(1,cg_idx),ovl_vecnm(1,gsc_idx),istwf_k,me_g0,comm)
253    end if
254 
255  else if (ANY(ortalgo==(/0,2/))) then 
256 !  =======================
257 !  Third (old) algorithm
258 !  =======================
259 
260    do ivec=1,nvec
261 !    Normalize each vecnm(n,m) in turn:
262 
263      if (useoverlap==1) then ! Using overlap S...
264        if(istwf_k/=2)then
265          sum=zero;ii0=1
266        else
267          if (me_g0 ==1) then
268            sum=half*ovl_vecnm(1,1+nelem*(ivec-1)+igsc)*vecnm(1,1+nelem*(ivec-1)+icg)
269            ii0=2
270          else
271            sum=zero;ii0=1
272          end if
273        end if
274 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:sum) SHARED(icg,ivec,nelem,vecnm)
275        do ii=ii0+nelem*(ivec-1),nelem*ivec
276          sum=sum+vecnm(1,ii+icg)*ovl_vecnm(1,ii+igsc)+vecnm(2,ii+icg)*ovl_vecnm(2,ii+igsc)
277        end do
278 
279      else ! Without overlap...
280        if(istwf_k/=2)then
281          sum=zero;ii0=1
282        else
283          if (me_g0 ==1) then
284            sum=half*vecnm(1,1+nelem*(ivec-1)+icg)**2
285            ii0=2
286          else
287            sum=zero;ii0=1
288          end if
289        end if
290 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:sum) SHARED(icg,ivec,nelem,vecnm)
291        do ii=ii0+nelem*(ivec-1)+icg,nelem*ivec+icg
292          sum=sum+vecnm(1,ii)**2+vecnm(2,ii)**2
293        end do
294      end if
295 
296      call timab(48,1,tsec)
297      call xmpi_sum(sum,comm,ierr)
298      call timab(48,2,tsec)
299 
300      if(istwf_k>=2)sum=two*sum
301      xnorm = sqrt(abs(sum)) ;  sum=1.0_dp/xnorm
302 !$OMP PARALLEL DO PRIVATE(ii) SHARED(icg,ivec,nelem,sum,vecnm)
303      do ii=1+nelem*(ivec-1)+icg,nelem*ivec+icg
304        vecnm(1,ii)=vecnm(1,ii)*sum
305        vecnm(2,ii)=vecnm(2,ii)*sum
306      end do
307      if (useoverlap==1) then
308 !$OMP PARALLEL DO PRIVATE(ii) SHARED(icg,ivec,nelem,sum,ovl_vecnm)
309        do ii=1+nelem*(ivec-1)+igsc,nelem*ivec+igsc
310          ovl_vecnm(1,ii)=ovl_vecnm(1,ii)*sum
311          ovl_vecnm(2,ii)=ovl_vecnm(2,ii)*sum
312        end do
313      end if
314 
315 !    Remove projection in all higher states.
316      if (ivec<nvec) then
317 
318        if(istwf_k==1)then
319 !        Cannot use time-reversal symmetry
320 
321          if (useoverlap==1) then ! Using overlap.
322            do ivec2=ivec+1,nvec
323 !            First compute scalar product
324              dotr=zero ; doti=zero
325              ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+igsc
326 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:doti,dotr) SHARED(ii1,ii2,nelem,vecnm)
327              do ii=1,nelem
328                dotr=dotr+vecnm(1,ii1+ii)*ovl_vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*ovl_vecnm(2,ii2+ii)
329                doti=doti+vecnm(1,ii1+ii)*ovl_vecnm(2,ii2+ii)-vecnm(2,ii1+ii)*ovl_vecnm(1,ii2+ii)
330              end do
331 
332              call timab(48,1,tsec)
333              buffer2(1)=doti;buffer2(2)=dotr
334              call xmpi_sum(buffer2,comm,ierr)
335              call timab(48,2,tsec)
336              doti=buffer2(1)
337              dotr=buffer2(2)
338 
339 !            Then subtract the appropriate amount of the lower state
340              ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg
341 #ifdef FC_INTEL
342 !            DIR$ ivdep
343 #endif
344 !$OMP PARALLEL DO PRIVATE(ii) SHARED(doti,dotr,ii1,ii2,nelem,vecnm)
345              do ii=1,nelem
346                vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii)+doti*vecnm(2,ii1+ii)
347                vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-doti*vecnm(1,ii1+ii)-dotr*vecnm(2,ii1+ii)
348              end do
349 
350              ii1=nelem*(ivec-1)+igsc;ii2=nelem*(ivec2-1)+igsc
351              do ii=1,nelem
352                ovl_vecnm(1,ii2+ii)=ovl_vecnm(1,ii2+ii)&
353 &               -dotr*ovl_vecnm(1,ii1+ii)&
354 &               +doti*ovl_vecnm(2,ii1+ii)
355                ovl_vecnm(2,ii2+ii)=ovl_vecnm(2,ii2+ii)&
356                -doti*ovl_vecnm(1,ii1+ii)&
357 &               -dotr*ovl_vecnm(2,ii1+ii)
358              end do
359            end do
360          else
361 !          ----- No overlap -----
362            do ivec2=ivec+1,nvec
363 !            First compute scalar product
364              dotr=zero ; doti=zero
365              ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg
366 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:doti,dotr) SHARED(ii1,ii2,nelem,vecnm)
367              do ii=1,nelem
368                dotr=dotr+vecnm(1,ii1+ii)*vecnm(1,ii2+ii)+&
369 &               vecnm(2,ii1+ii)*vecnm(2,ii2+ii)
370                doti=doti+vecnm(1,ii1+ii)*vecnm(2,ii2+ii)-&
371 &               vecnm(2,ii1+ii)*vecnm(1,ii2+ii)
372              end do
373 !            Init mpi_comm
374              buffer2(1)=doti
375              buffer2(2)=dotr
376              call timab(48,1,tsec)
377              call xmpi_sum(buffer2,comm,ierr)
378 !            call xmpi_sum(doti,spaceComm,ierr)
379 !            call xmpi_sum(dotr,spaceComm,ierr)
380              call timab(48,2,tsec)
381              doti=buffer2(1)
382              dotr=buffer2(2)
383 
384 !            Then subtract the appropriate amount of the lower state
385 #ifdef FC_INTEL
386 !            DIR$ ivdep
387 #endif
388 !$OMP PARALLEL DO PRIVATE(ii) SHARED(doti,dotr,ii1,ii2,nelem,vecnm)
389              do ii=1,nelem
390                vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii)+&
391 &               doti*vecnm(2,ii1+ii)
392                vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-doti*vecnm(1,ii1+ii)-&
393 &               dotr*vecnm(2,ii1+ii)
394              end do
395            end do
396 
397          end if  ! Test on useoverlap
398 
399        else if(istwf_k==2)then
400 !        At gamma point use of time-reversal symmetry saves cpu time.
401 
402          if (useoverlap==1) then
403 !          ----- Using overlap -----
404            do ivec2=ivec+1,nvec
405 !            First compute scalar product
406              ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+igsc
407              if (me_g0 ==1) then
408                dotr=half*vecnm(1,ii1+1)*ovl_vecnm(1,ii2+1)
409 !              Avoid double counting G=0 contribution
410 !              Imaginary part of vecnm at G=0 should be zero,so only take real part
411 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm)
412                do ii=2,nelem
413                  dotr=dotr+vecnm(1,ii1+ii)*ovl_vecnm(1,ii2+ii)+&
414 &                 vecnm(2,ii1+ii)*ovl_vecnm(2,ii2+ii)
415                end do
416              else
417                dotr=0._dp
418 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm)
419                do ii=1,nelem
420                  dotr=dotr+vecnm(1,ii1+ii)*ovl_vecnm(1,ii2+ii)+&
421 &                 vecnm(2,ii1+ii)*ovl_vecnm(2,ii2+ii)
422                end do
423              end if
424 
425              dotr=two*dotr
426 
427              call timab(48,1,tsec)
428              call xmpi_sum(dotr,comm,ierr)
429              call timab(48,2,tsec)
430 
431 !            Then subtract the appropriate amount of the lower state
432              ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg
433 #ifdef FC_INTEL
434 !            DIR$ ivdep
435 #endif
436 !$OMP PARALLEL DO PRIVATE(ii) SHARED(dotr,ii1,ii2,nelem,vecnm)
437              do ii=1,nelem
438                vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii)
439                vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-dotr*vecnm(2,ii1+ii)
440              end do
441              ii1=nelem*(ivec-1)+igsc;ii2=nelem*(ivec2-1)+igsc
442              do ii=1,nelem
443                ovl_vecnm(1,ii2+ii)=ovl_vecnm(1,ii2+ii)-dotr*ovl_vecnm(1,ii1+ii)
444                ovl_vecnm(2,ii2+ii)=ovl_vecnm(2,ii2+ii)-dotr*ovl_vecnm(2,ii1+ii)
445              end do
446            end do
447          else
448 !          ----- No overlap -----
449            do ivec2=ivec+1,nvec
450 !            First compute scalar product
451              ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg
452              if (me_g0 ==1) then
453 !              Avoid double counting G=0 contribution
454 !              Imaginary part of vecnm at G=0 should be zero,so only take real part
455                dotr=half*vecnm(1,ii1+1)*vecnm(1,ii2+1)
456 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm)
457                do ii=2,nelem
458                  dotr=dotr+vecnm(1,ii1+ii)*vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*vecnm(2,ii2+ii)
459                end do
460              else
461                dotr=0._dp
462 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm)
463                do ii=1,nelem
464                  dotr=dotr+vecnm(1,ii1+ii)*vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*vecnm(2,ii2+ii)
465                end do
466              end if
467              dotr=two*dotr
468 
469              call timab(48,1,tsec)
470              call xmpi_sum(dotr,comm,ierr)
471              call timab(48,2,tsec)
472 
473 !            Then subtract the appropriate amount of the lower state
474 #ifdef FC_INTEL
475 !            DIR$ ivdep
476 #endif
477 !$OMP PARALLEL DO PRIVATE(ii) SHARED(dotr,ii1,ii2,nelem,vecnm)
478              do ii=1,nelem
479                vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii)
480                vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-dotr*vecnm(2,ii1+ii)
481              end do
482            end do
483          end if  ! Test on useoverlap
484 
485        else
486 !        At other special points,use of time-reversal symmetry saves cpu time.
487 
488          if (useoverlap==1) then
489 !          ----- Using overlap -----
490            do ivec2=ivec+1,nvec
491 !            First compute scalar product
492              ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+igsc
493 !            Avoid double counting G=0 contribution
494 !            Imaginary part of vecnm at G=0 should be zero,so only take real part
495              dotr=zero
496 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm)
497              do ii=1,nelem
498                dotr=dotr+vecnm(1,ii1+ii)*ovl_vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*ovl_vecnm(2,ii2+ii)
499              end do
500              dotr=two*dotr
501 
502              call timab(48,1,tsec)
503              call xmpi_sum(dotr,comm,ierr)
504              call timab(48,2,tsec)
505              
506 !            Then subtract the appropriate amount of the lower state
507              ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg
508 #ifdef FC_INTEL
509 !            DIR$ ivdep
510 #endif
511 !$OMP PARALLEL DO PRIVATE(ii) SHARED(dotr,ii1,ii2,nelem,vecnm)
512              do ii=1,nelem
513                vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii)
514                vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-dotr*vecnm(2,ii1+ii)
515              end do
516              ii1=nelem*(ivec-1)+igsc;ii2=nelem*(ivec2-1)+igsc
517              do ii=1,nelem
518                ovl_vecnm(1,ii2+ii)=ovl_vecnm(1,ii2+ii)-dotr*ovl_vecnm(1,ii1+ii)
519                ovl_vecnm(2,ii2+ii)=ovl_vecnm(2,ii2+ii)-dotr*ovl_vecnm(2,ii1+ii)
520              end do
521            end do
522          else
523 !          ----- No overlap -----
524            do ivec2=ivec+1,nvec
525 !            First compute scalar product
526              ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg
527 !            Avoid double counting G=0 contribution
528 !            Imaginary part of vecnm at G=0 should be zero,so only take real part
529              dotr=zero
530 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm)
531              do ii=1,nelem
532                dotr=dotr+vecnm(1,ii1+ii)*vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*vecnm(2,ii2+ii)
533              end do
534              dotr=two*dotr
535 
536              call timab(48,1,tsec)
537              call xmpi_sum(dotr,comm,ierr)
538              call timab(48,2,tsec)
539 
540 !            Then subtract the appropriate amount of the lower state
541 !$OMP PARALLEL DO PRIVATE(ii) SHARED(dotr,ii1,ii2,nelem,vecnm)
542              do ii=1,nelem
543                vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii)
544                vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-dotr*vecnm(2,ii1+ii)
545              end do
546            end do
547          end if
548 
549 !        End use of time-reversal symmetry
550        end if
551 
552      end if  ! Test on "ivec"
553 
554 !    end loop over vectors (or bands) with index ivec :
555    end do
556 
557  else 
558    write(msg,'(a,i0)')"Wrong value for ortalgo: ",ortalgo
559    MSG_ERROR(msg)
560  end if ! End of the second algorithm
561 
562 end subroutine pw_orthon