TABLE OF CONTENTS
ABINIT/pw_orthon [ Functions ]
NAME
pw_orthon
FUNCTION
Normalize nvec complex vectors each of length nelem and then orthogonalize by modified Gram-Schmidt. Two orthogonality conditions are available: Simple orthogonality: ${<Vec_{i}|Vec_{j}>=Delta_ij}$ Orthogonality with overlap S: ${<Vec_{i}|S|Vec_{j}>=Delta_ij}$
COPYRIGHT
Copyright (C) 1998-2018 ABINIT group (DCA,XG,GMR,FF,MT) This file is distributed under the terms of the GNU General Public License,see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt.
INPUTS
icg=shift to be given to the location of the data in cg(=vecnm) igsc=shift to be given to the location of the data in gsc(=ovl_vecnm) istwf_k=option parameter that describes the storage of wfs mcg=maximum size of second dimension of cg(=vecnm) mgsc=maximum size of second dimension of gsc(=ovl_vecnm) nelem=number of complex elements in each vector nvec=number of vectors to be orthonormalized ortalgo= option for the choice of the algorithm -1: no orthogonalization (direct return) 0 or 2: old algorithm (use of buffers) 1: new algorithm (use of blas) 3: new new algorithm (use of lapack without copy) useoverlap=select the orthogonality condition 0: no overlap between vectors 1: vectors are overlapping me_g0=1 if this processor has G=0, 0 otherwise comm=MPI communicator
SIDE EFFECTS
vecnm= input: vectors to be orthonormalized; array of nvec column vectors,each of length nelem,shifted by icg This array is complex or else real(dp) of twice length output: orthonormalized set of vectors if (useoverlap==1) only: ovl_vecnm= input: product of overlap and input vectors: S|vecnm>,where S is the overlap operator output: updated S|vecnm> according to vecnm
NOTES
Note that each vector has an arbitrary phase which is not fixed in this routine. WARNING : not yet suited for nspinor=2 with istwfk/=1
PARENTS
lapackprof,vtowfk,wfconv
CHILDREN
abi_xcopy,abi_xorthonormalize,abi_xtrsm,cgnc_cholesky,cgnc_gramschmidt cgpaw_cholesky,cgpaw_gramschmidt,ortho_reim,timab,xmpi_sum
SOURCE
61 #if defined HAVE_CONFIG_H 62 #include "config.h" 63 #endif 64 65 #include "abi_common.h" 66 67 subroutine pw_orthon(icg,igsc,istwf_k,mcg,mgsc,nelem,nvec,ortalgo,ovl_vecnm,useoverlap,vecnm,me_g0,comm) 68 69 use defs_basis 70 use m_errors 71 use m_profiling_abi 72 use m_xmpi 73 use m_cgtools 74 use m_abi_linalg 75 76 !This section has been created automatically by the script Abilint (TD). 77 !Do not modify the following lines by hand. 78 #undef ABI_FUNC 79 #define ABI_FUNC 'pw_orthon' 80 use interfaces_18_timing 81 !End of the abilint section 82 83 implicit none 84 85 !Arguments ------------------------------------ 86 !scalars 87 integer,intent(in) :: icg,igsc,istwf_k,mcg,mgsc,nelem,nvec,ortalgo,useoverlap,me_g0,comm 88 !arrays 89 real(dp),intent(inout) :: ovl_vecnm(2,mgsc*useoverlap),vecnm(2,mcg) 90 91 !Local variables------------------------------- 92 !scalars 93 integer :: ierr,ii,ii0,ii1,ii2,ivec,ivec2 94 integer :: rvectsiz,vectsize,cg_idx,gsc_idx 95 real(dp) :: doti,dotr,sum,xnorm 96 character(len=500) :: msg 97 !arrays 98 integer :: cgindex(nvec),gscindex(nvec) 99 real(dp) :: buffer2(2),tsec(2) 100 real(dp),allocatable :: rblockvectorbx(:,:),rblockvectorx(:,:),rgramxbx(:,:) 101 complex(dpc),allocatable :: cblockvectorbx(:,:),cblockvectorx(:,:) 102 complex(dpc),allocatable :: cgramxbx(:,:) 103 104 ! ************************************************************************* 105 106 #ifdef DEBUG_MODE 107 !Make sure imaginary part at G=0 vanishes 108 if (istwf_k==2) then 109 do ivec=1,nvec 110 if(abs(vecnm(2,1+nelem*(ivec-1)+icg))>zero)then 111 ! if(abs(vecnm(2,1+nelem*(ivec-1)+icg))>tol16)then 112 write(msg,'(2a,3i0,2es16.6,a,a)')& 113 & ' For istwf_k=2,observed the following element of vecnm :',ch10,& 114 & nelem,ivec,icg,vecnm(1:2,1+nelem*(ivec-1)+icg),ch10,& 115 & ' with a non-negligible imaginary part.' 116 MSG_BUG(msg) 117 end if 118 end do 119 end if 120 #endif 121 122 !Nothing to do if ortalgo=-1 123 if(ortalgo==-1) return 124 125 do ivec=1,nvec 126 cgindex(ivec)=nelem*(ivec-1)+icg+1 127 gscindex(ivec)=nelem*(ivec-1)+igsc+1 128 end do 129 130 if (ortalgo==3) then 131 ! ========================= 132 ! First (new new) algorithm 133 ! ========================= 134 ! NEW VERSION: avoid copies, use ZHERK for NC 135 cg_idx = cgindex(1) 136 if (useoverlap==1) then 137 gsc_idx = gscindex(1) 138 call cgpaw_cholesky(nelem,nvec,vecnm(1,cg_idx),ovl_vecnm(1,gsc_idx),istwf_k,me_g0,comm) 139 else 140 call cgnc_cholesky(nelem,nvec,vecnm(1,cg_idx),istwf_k,me_g0,comm,use_gemm=.FALSE.) 141 end if 142 143 else if(ortalgo==1) then 144 ! ======================= 145 ! Second (new) algorithm 146 ! ======================= 147 ! This first algorithm seems to be more efficient especially in the parallel band-FFT mode. 148 149 if(istwf_k==1) then 150 vectsize=nelem 151 ABI_ALLOCATE(cgramxbx,(nvec,nvec)) 152 ABI_ALLOCATE(cblockvectorx,(vectsize,nvec)) 153 ABI_ALLOCATE(cblockvectorbx,(vectsize,nvec)) 154 call abi_xcopy(nvec*vectsize,vecnm(:,cgindex(1):cgindex(nvec)-1),1,cblockvectorx,1,x_cplx=2) 155 if (useoverlap == 1) then 156 call abi_xcopy(nvec*vectsize,ovl_vecnm(:,gscindex(1):gscindex(nvec)-1),1,cblockvectorbx,1,x_cplx=2) 157 else 158 call abi_xcopy(nvec*vectsize,vecnm(:,cgindex(1):cgindex(nvec)-1),1,cblockvectorbx,1,x_cplx=2) 159 end if 160 call abi_xorthonormalize(cblockvectorx,cblockvectorbx,nvec,comm,cgramxbx,vectsize) 161 call abi_xcopy(nvec*vectsize,cblockvectorx,1,vecnm(:,cgindex(1):cgindex(nvec)-1),1,x_cplx=2) 162 if (useoverlap == 1) then 163 call abi_xtrsm('r','u','n','n',vectsize,nvec,cone,cgramxbx,nvec,cblockvectorbx,vectsize) 164 call abi_xcopy(nvec*vectsize,cblockvectorbx,1,ovl_vecnm(:,gscindex(1):gscindex(nvec)-1),1,x_cplx=2) 165 end if 166 ABI_DEALLOCATE(cgramxbx) 167 ABI_DEALLOCATE(cblockvectorx) 168 ABI_DEALLOCATE(cblockvectorbx) 169 170 else if(istwf_k==2) then 171 ! Pack real and imaginary part of the wavefunctions. 172 rvectsiz=nelem 173 vectsize=2*nelem; if(me_g0==1) vectsize=vectsize-1 174 ABI_ALLOCATE(rgramxbx,(nvec,nvec)) 175 ABI_ALLOCATE(rblockvectorx,(vectsize,nvec)) 176 ABI_ALLOCATE(rblockvectorbx,(vectsize,nvec)) 177 do ivec=1,nvec 178 if (me_g0 == 1) then 179 call abi_xcopy(1,vecnm(1,cgindex(ivec)),1,rblockvectorx (1,ivec),1) 180 call abi_xcopy(rvectsiz-1,vecnm(1,cgindex(ivec)+1),2,rblockvectorx(2,ivec),1) 181 call abi_xcopy(rvectsiz-1,vecnm(2,cgindex(ivec)+1),2,rblockvectorx(rvectsiz+1,ivec),1) 182 if (useoverlap == 1) then 183 call abi_xcopy(1,ovl_vecnm(1,gscindex(ivec)),1,rblockvectorbx(1,ivec),1) 184 call abi_xcopy(rvectsiz-1,ovl_vecnm(1,gscindex(ivec)+1),2,rblockvectorbx(2,ivec),1) 185 call abi_xcopy(rvectsiz-1,ovl_vecnm(2,gscindex(ivec)+1),2,rblockvectorbx(rvectsiz+1,ivec),1) 186 else 187 call abi_xcopy(1,vecnm(1,cgindex(ivec)),1,rblockvectorbx(1,ivec),1) 188 call abi_xcopy(rvectsiz-1,vecnm(1,cgindex(ivec)+1),2,rblockvectorbx(2,ivec),1) 189 call abi_xcopy(rvectsiz-1,vecnm(2,cgindex(ivec)+1),2,rblockvectorbx(rvectsiz+1,ivec),1) 190 end if 191 rblockvectorx (2:vectsize,ivec)=rblockvectorx (2:vectsize,ivec)*sqrt2 192 rblockvectorbx(2:vectsize,ivec)=rblockvectorbx(2:vectsize,ivec)*sqrt2 193 else 194 call abi_xcopy(rvectsiz,vecnm(1,cgindex(ivec)),2,rblockvectorx(1,ivec),1) 195 call abi_xcopy(rvectsiz,vecnm(2,cgindex(ivec)),2,rblockvectorx(rvectsiz+1,ivec),1) 196 if (useoverlap == 1) then 197 call abi_xcopy(rvectsiz,ovl_vecnm(1,gscindex(ivec)),2,rblockvectorbx(1,ivec),1) 198 call abi_xcopy(rvectsiz,ovl_vecnm(2,gscindex(ivec)),2,rblockvectorbx(rvectsiz+1,ivec),1) 199 else 200 call abi_xcopy(rvectsiz,vecnm(1,cgindex(ivec)),2,rblockvectorbx(1,ivec),1) 201 call abi_xcopy(rvectsiz,vecnm(2,cgindex(ivec)),2,rblockvectorbx(rvectsiz+1,ivec),1) 202 end if 203 rblockvectorx (1:vectsize,ivec)=rblockvectorx (1:vectsize,ivec)*sqrt2 204 rblockvectorbx(1:vectsize,ivec)=rblockvectorbx(1:vectsize,ivec)*sqrt2 205 end if 206 end do 207 208 call ortho_reim(rblockvectorx,rblockvectorbx,nvec,comm,rgramxbx,vectsize) 209 210 do ivec=1,nvec 211 ! Unpack results 212 if (me_g0 == 1) then 213 call abi_xcopy(1,rblockvectorx(1,ivec),1,vecnm(1,cgindex(ivec)),1) 214 vecnm(2,cgindex(ivec))=zero 215 rblockvectorx(2:vectsize,ivec)=rblockvectorx(2:vectsize,ivec)/sqrt2 216 call abi_xcopy(rvectsiz-1,rblockvectorx(2,ivec),1,vecnm(1,cgindex(ivec)+1),2) 217 call abi_xcopy(rvectsiz-1,rblockvectorx(rvectsiz+1,ivec),1,vecnm(2,cgindex(ivec)+1),2) 218 else 219 rblockvectorx(1:vectsize,ivec)=rblockvectorx(1:vectsize,ivec)/sqrt2 220 call abi_xcopy(rvectsiz,rblockvectorx(1,ivec),1,vecnm(1,cgindex(ivec)),2) 221 call abi_xcopy(rvectsiz,rblockvectorx(rvectsiz+1,ivec),1,vecnm(2,cgindex(ivec)),2) 222 end if 223 224 if(useoverlap == 1) then 225 call abi_xtrsm('r','u','n','n',vectsize,nvec,one,rgramxbx,nvec,rblockvectorbx,vectsize) 226 if (me_g0 == 1) then 227 call abi_xcopy(1,rblockvectorbx(1,ivec),1,ovl_vecnm(1,gscindex(ivec)),1) 228 ovl_vecnm(2,gscindex(ivec))=zero 229 rblockvectorbx(2:vectsize,ivec)=rblockvectorbx(2:vectsize,ivec)/sqrt2 230 call abi_xcopy(rvectsiz-1,rblockvectorbx(2,ivec),1,ovl_vecnm(1,gscindex(ivec)+1),2) 231 call abi_xcopy(rvectsiz-1,rblockvectorbx(rvectsiz+1,ivec),1,ovl_vecnm(2,gscindex(ivec)+1),2) 232 else 233 rblockvectorbx(1:vectsize,ivec)=rblockvectorbx(1:vectsize,ivec)/sqrt2 234 call abi_xcopy(rvectsiz,rblockvectorbx(1,ivec),1,ovl_vecnm(1,gscindex(ivec)),2) 235 call abi_xcopy(rvectsiz,rblockvectorbx(rvectsiz+1,ivec),1,ovl_vecnm(2,gscindex(ivec)),2) 236 end if 237 end if 238 end do 239 ABI_DEALLOCATE(rgramxbx) 240 ABI_DEALLOCATE(rblockvectorx) 241 ABI_DEALLOCATE(rblockvectorbx) 242 end if 243 244 else if (ortalgo==4) then 245 ! else if (ANY(ortalgo==(/0,2/))) then 246 247 cg_idx = cgindex(1) 248 if (useoverlap==0) then 249 call cgnc_gramschmidt(nelem,nvec,vecnm(1,cg_idx),istwf_k,me_g0,comm) 250 else 251 gsc_idx = gscindex(1) 252 call cgpaw_gramschmidt(nelem,nvec,vecnm(1,cg_idx),ovl_vecnm(1,gsc_idx),istwf_k,me_g0,comm) 253 end if 254 255 else if (ANY(ortalgo==(/0,2/))) then 256 ! ======================= 257 ! Third (old) algorithm 258 ! ======================= 259 260 do ivec=1,nvec 261 ! Normalize each vecnm(n,m) in turn: 262 263 if (useoverlap==1) then ! Using overlap S... 264 if(istwf_k/=2)then 265 sum=zero;ii0=1 266 else 267 if (me_g0 ==1) then 268 sum=half*ovl_vecnm(1,1+nelem*(ivec-1)+igsc)*vecnm(1,1+nelem*(ivec-1)+icg) 269 ii0=2 270 else 271 sum=zero;ii0=1 272 end if 273 end if 274 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:sum) SHARED(icg,ivec,nelem,vecnm) 275 do ii=ii0+nelem*(ivec-1),nelem*ivec 276 sum=sum+vecnm(1,ii+icg)*ovl_vecnm(1,ii+igsc)+vecnm(2,ii+icg)*ovl_vecnm(2,ii+igsc) 277 end do 278 279 else ! Without overlap... 280 if(istwf_k/=2)then 281 sum=zero;ii0=1 282 else 283 if (me_g0 ==1) then 284 sum=half*vecnm(1,1+nelem*(ivec-1)+icg)**2 285 ii0=2 286 else 287 sum=zero;ii0=1 288 end if 289 end if 290 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:sum) SHARED(icg,ivec,nelem,vecnm) 291 do ii=ii0+nelem*(ivec-1)+icg,nelem*ivec+icg 292 sum=sum+vecnm(1,ii)**2+vecnm(2,ii)**2 293 end do 294 end if 295 296 call timab(48,1,tsec) 297 call xmpi_sum(sum,comm,ierr) 298 call timab(48,2,tsec) 299 300 if(istwf_k>=2)sum=two*sum 301 xnorm = sqrt(abs(sum)) ; sum=1.0_dp/xnorm 302 !$OMP PARALLEL DO PRIVATE(ii) SHARED(icg,ivec,nelem,sum,vecnm) 303 do ii=1+nelem*(ivec-1)+icg,nelem*ivec+icg 304 vecnm(1,ii)=vecnm(1,ii)*sum 305 vecnm(2,ii)=vecnm(2,ii)*sum 306 end do 307 if (useoverlap==1) then 308 !$OMP PARALLEL DO PRIVATE(ii) SHARED(icg,ivec,nelem,sum,ovl_vecnm) 309 do ii=1+nelem*(ivec-1)+igsc,nelem*ivec+igsc 310 ovl_vecnm(1,ii)=ovl_vecnm(1,ii)*sum 311 ovl_vecnm(2,ii)=ovl_vecnm(2,ii)*sum 312 end do 313 end if 314 315 ! Remove projection in all higher states. 316 if (ivec<nvec) then 317 318 if(istwf_k==1)then 319 ! Cannot use time-reversal symmetry 320 321 if (useoverlap==1) then ! Using overlap. 322 do ivec2=ivec+1,nvec 323 ! First compute scalar product 324 dotr=zero ; doti=zero 325 ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+igsc 326 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:doti,dotr) SHARED(ii1,ii2,nelem,vecnm) 327 do ii=1,nelem 328 dotr=dotr+vecnm(1,ii1+ii)*ovl_vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*ovl_vecnm(2,ii2+ii) 329 doti=doti+vecnm(1,ii1+ii)*ovl_vecnm(2,ii2+ii)-vecnm(2,ii1+ii)*ovl_vecnm(1,ii2+ii) 330 end do 331 332 call timab(48,1,tsec) 333 buffer2(1)=doti;buffer2(2)=dotr 334 call xmpi_sum(buffer2,comm,ierr) 335 call timab(48,2,tsec) 336 doti=buffer2(1) 337 dotr=buffer2(2) 338 339 ! Then subtract the appropriate amount of the lower state 340 ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg 341 #ifdef FC_INTEL 342 ! DIR$ ivdep 343 #endif 344 !$OMP PARALLEL DO PRIVATE(ii) SHARED(doti,dotr,ii1,ii2,nelem,vecnm) 345 do ii=1,nelem 346 vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii)+doti*vecnm(2,ii1+ii) 347 vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-doti*vecnm(1,ii1+ii)-dotr*vecnm(2,ii1+ii) 348 end do 349 350 ii1=nelem*(ivec-1)+igsc;ii2=nelem*(ivec2-1)+igsc 351 do ii=1,nelem 352 ovl_vecnm(1,ii2+ii)=ovl_vecnm(1,ii2+ii)& 353 & -dotr*ovl_vecnm(1,ii1+ii)& 354 & +doti*ovl_vecnm(2,ii1+ii) 355 ovl_vecnm(2,ii2+ii)=ovl_vecnm(2,ii2+ii)& 356 -doti*ovl_vecnm(1,ii1+ii)& 357 & -dotr*ovl_vecnm(2,ii1+ii) 358 end do 359 end do 360 else 361 ! ----- No overlap ----- 362 do ivec2=ivec+1,nvec 363 ! First compute scalar product 364 dotr=zero ; doti=zero 365 ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg 366 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:doti,dotr) SHARED(ii1,ii2,nelem,vecnm) 367 do ii=1,nelem 368 dotr=dotr+vecnm(1,ii1+ii)*vecnm(1,ii2+ii)+& 369 & vecnm(2,ii1+ii)*vecnm(2,ii2+ii) 370 doti=doti+vecnm(1,ii1+ii)*vecnm(2,ii2+ii)-& 371 & vecnm(2,ii1+ii)*vecnm(1,ii2+ii) 372 end do 373 ! Init mpi_comm 374 buffer2(1)=doti 375 buffer2(2)=dotr 376 call timab(48,1,tsec) 377 call xmpi_sum(buffer2,comm,ierr) 378 ! call xmpi_sum(doti,spaceComm,ierr) 379 ! call xmpi_sum(dotr,spaceComm,ierr) 380 call timab(48,2,tsec) 381 doti=buffer2(1) 382 dotr=buffer2(2) 383 384 ! Then subtract the appropriate amount of the lower state 385 #ifdef FC_INTEL 386 ! DIR$ ivdep 387 #endif 388 !$OMP PARALLEL DO PRIVATE(ii) SHARED(doti,dotr,ii1,ii2,nelem,vecnm) 389 do ii=1,nelem 390 vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii)+& 391 & doti*vecnm(2,ii1+ii) 392 vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-doti*vecnm(1,ii1+ii)-& 393 & dotr*vecnm(2,ii1+ii) 394 end do 395 end do 396 397 end if ! Test on useoverlap 398 399 else if(istwf_k==2)then 400 ! At gamma point use of time-reversal symmetry saves cpu time. 401 402 if (useoverlap==1) then 403 ! ----- Using overlap ----- 404 do ivec2=ivec+1,nvec 405 ! First compute scalar product 406 ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+igsc 407 if (me_g0 ==1) then 408 dotr=half*vecnm(1,ii1+1)*ovl_vecnm(1,ii2+1) 409 ! Avoid double counting G=0 contribution 410 ! Imaginary part of vecnm at G=0 should be zero,so only take real part 411 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm) 412 do ii=2,nelem 413 dotr=dotr+vecnm(1,ii1+ii)*ovl_vecnm(1,ii2+ii)+& 414 & vecnm(2,ii1+ii)*ovl_vecnm(2,ii2+ii) 415 end do 416 else 417 dotr=0._dp 418 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm) 419 do ii=1,nelem 420 dotr=dotr+vecnm(1,ii1+ii)*ovl_vecnm(1,ii2+ii)+& 421 & vecnm(2,ii1+ii)*ovl_vecnm(2,ii2+ii) 422 end do 423 end if 424 425 dotr=two*dotr 426 427 call timab(48,1,tsec) 428 call xmpi_sum(dotr,comm,ierr) 429 call timab(48,2,tsec) 430 431 ! Then subtract the appropriate amount of the lower state 432 ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg 433 #ifdef FC_INTEL 434 ! DIR$ ivdep 435 #endif 436 !$OMP PARALLEL DO PRIVATE(ii) SHARED(dotr,ii1,ii2,nelem,vecnm) 437 do ii=1,nelem 438 vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii) 439 vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-dotr*vecnm(2,ii1+ii) 440 end do 441 ii1=nelem*(ivec-1)+igsc;ii2=nelem*(ivec2-1)+igsc 442 do ii=1,nelem 443 ovl_vecnm(1,ii2+ii)=ovl_vecnm(1,ii2+ii)-dotr*ovl_vecnm(1,ii1+ii) 444 ovl_vecnm(2,ii2+ii)=ovl_vecnm(2,ii2+ii)-dotr*ovl_vecnm(2,ii1+ii) 445 end do 446 end do 447 else 448 ! ----- No overlap ----- 449 do ivec2=ivec+1,nvec 450 ! First compute scalar product 451 ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg 452 if (me_g0 ==1) then 453 ! Avoid double counting G=0 contribution 454 ! Imaginary part of vecnm at G=0 should be zero,so only take real part 455 dotr=half*vecnm(1,ii1+1)*vecnm(1,ii2+1) 456 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm) 457 do ii=2,nelem 458 dotr=dotr+vecnm(1,ii1+ii)*vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*vecnm(2,ii2+ii) 459 end do 460 else 461 dotr=0._dp 462 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm) 463 do ii=1,nelem 464 dotr=dotr+vecnm(1,ii1+ii)*vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*vecnm(2,ii2+ii) 465 end do 466 end if 467 dotr=two*dotr 468 469 call timab(48,1,tsec) 470 call xmpi_sum(dotr,comm,ierr) 471 call timab(48,2,tsec) 472 473 ! Then subtract the appropriate amount of the lower state 474 #ifdef FC_INTEL 475 ! DIR$ ivdep 476 #endif 477 !$OMP PARALLEL DO PRIVATE(ii) SHARED(dotr,ii1,ii2,nelem,vecnm) 478 do ii=1,nelem 479 vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii) 480 vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-dotr*vecnm(2,ii1+ii) 481 end do 482 end do 483 end if ! Test on useoverlap 484 485 else 486 ! At other special points,use of time-reversal symmetry saves cpu time. 487 488 if (useoverlap==1) then 489 ! ----- Using overlap ----- 490 do ivec2=ivec+1,nvec 491 ! First compute scalar product 492 ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+igsc 493 ! Avoid double counting G=0 contribution 494 ! Imaginary part of vecnm at G=0 should be zero,so only take real part 495 dotr=zero 496 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm) 497 do ii=1,nelem 498 dotr=dotr+vecnm(1,ii1+ii)*ovl_vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*ovl_vecnm(2,ii2+ii) 499 end do 500 dotr=two*dotr 501 502 call timab(48,1,tsec) 503 call xmpi_sum(dotr,comm,ierr) 504 call timab(48,2,tsec) 505 506 ! Then subtract the appropriate amount of the lower state 507 ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg 508 #ifdef FC_INTEL 509 ! DIR$ ivdep 510 #endif 511 !$OMP PARALLEL DO PRIVATE(ii) SHARED(dotr,ii1,ii2,nelem,vecnm) 512 do ii=1,nelem 513 vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii) 514 vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-dotr*vecnm(2,ii1+ii) 515 end do 516 ii1=nelem*(ivec-1)+igsc;ii2=nelem*(ivec2-1)+igsc 517 do ii=1,nelem 518 ovl_vecnm(1,ii2+ii)=ovl_vecnm(1,ii2+ii)-dotr*ovl_vecnm(1,ii1+ii) 519 ovl_vecnm(2,ii2+ii)=ovl_vecnm(2,ii2+ii)-dotr*ovl_vecnm(2,ii1+ii) 520 end do 521 end do 522 else 523 ! ----- No overlap ----- 524 do ivec2=ivec+1,nvec 525 ! First compute scalar product 526 ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg 527 ! Avoid double counting G=0 contribution 528 ! Imaginary part of vecnm at G=0 should be zero,so only take real part 529 dotr=zero 530 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm) 531 do ii=1,nelem 532 dotr=dotr+vecnm(1,ii1+ii)*vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*vecnm(2,ii2+ii) 533 end do 534 dotr=two*dotr 535 536 call timab(48,1,tsec) 537 call xmpi_sum(dotr,comm,ierr) 538 call timab(48,2,tsec) 539 540 ! Then subtract the appropriate amount of the lower state 541 !$OMP PARALLEL DO PRIVATE(ii) SHARED(dotr,ii1,ii2,nelem,vecnm) 542 do ii=1,nelem 543 vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii) 544 vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-dotr*vecnm(2,ii1+ii) 545 end do 546 end do 547 end if 548 549 ! End use of time-reversal symmetry 550 end if 551 552 end if ! Test on "ivec" 553 554 ! end loop over vectors (or bands) with index ivec : 555 end do 556 557 else 558 write(msg,'(a,i0)')"Wrong value for ortalgo: ",ortalgo 559 MSG_ERROR(msg) 560 end if ! End of the second algorithm 561 562 end subroutine pw_orthon