![]() |
Millepede-II
V04-00-00
|
00001 00012 00014 MODULE mpbits 00015 USE mpdef 00016 IMPLICIT NONE 00017 00018 INTEGER(KIND=large) :: ndimb !< dimension for bit (field) array 00019 INTEGER :: n !< matrix size 00020 INTEGER :: ibfw !< bit field width 00021 INTEGER :: mxcnt !< max value for bit field counters 00022 INTEGER :: nencdm !< max value for column counter 00023 INTEGER :: nencdb !< number of bits for encoding column counter 00024 INTEGER :: nthrd !< number of threads 00025 INTEGER, DIMENSION(:), ALLOCATABLE :: bitFieldCounters !< fit field counters for global parameters pairs 00026 00027 END MODULE mpbits 00028 00035 SUBROUTINE inbits(im,jm,inc) ! include element (I,J) 00036 USE mpbits 00037 00038 INTEGER, INTENT(IN) :: im 00039 INTEGER, INTENT(IN) :: jm 00040 INTEGER, INTENT(IN) :: inc 00041 00042 INTEGER(KIND=large) :: l 00043 INTEGER(KIND=large) :: ll 00044 INTEGER :: i 00045 INTEGER :: j 00046 INTEGER :: noffj 00047 INTEGER :: m 00048 INTEGER :: mm 00049 INTEGER :: icount 00050 INTEGER :: ib 00051 INTEGER :: jcount 00052 INTEGER*8 :: noffi 00053 LOGICAL :: btest 00054 00055 IF(im == jm) RETURN ! diagonal 00056 j=MIN(im,jm) 00057 i=MAX(im,jm) 00058 IF(j <= 0) RETURN ! out low 00059 IF(i > n) RETURN ! out high 00060 noffi=int8(i-1)*int8(i-2)*int8(ibfw)/2 ! for J=1 00061 noffj=(j-1)*ibfw 00062 l=noffi/32+i+noffj/32 ! row offset + column offset 00063 ! add I instead of 1 to keep bit maps of different rows in different words (openMP !) 00064 m=MOD(noffj,32) 00065 IF (ibfw <= 1) THEN 00066 bitFieldCounters(l)=ibset(bitFieldCounters(l),m) 00067 ELSE 00068 ! get counter from bit field 00069 ll=l 00070 mm=m 00071 icount=0 00072 DO ib=0,ibfw-1 00073 IF (btest(bitFieldCounters(ll),mm)) icount=ibset(icount,ib) 00074 mm=mm+1 00075 IF (mm >= 32) THEN 00076 ll=ll+1 00077 mm=mm-32 00078 END IF 00079 END DO 00080 ! increment 00081 jcount=icount 00082 icount=MIN(icount+inc,mxcnt) 00083 ! store counter into bit field 00084 IF (icount /= jcount) THEN 00085 ll=l 00086 mm=m 00087 DO ib=0,ibfw-1 00088 IF (btest(icount,ib)) THEN 00089 bitFieldCounters(ll)=ibset(bitFieldCounters(ll),mm) 00090 ELSE 00091 bitFieldCounters(ll)=ibclr(bitFieldCounters(ll),mm) 00092 END IF 00093 mm=mm+1 00094 IF (mm >= 32) THEN 00095 ll=ll+1 00096 mm=mm-32 00097 END IF 00098 END DO 00099 END IF 00100 END IF 00101 RETURN 00102 00103 END SUBROUTINE inbits 00104 00112 SUBROUTINE clbits(in,idimb,iencdb,jbfw) 00113 USE mpbits 00114 USE mpdalc 00115 00116 INTEGER, INTENT(IN) :: in 00117 INTEGER(KIND=large), INTENT(OUT) :: idimb 00118 INTEGER, INTENT(OUT) :: iencdb 00119 INTEGER, INTENT(IN) :: jbfw 00120 00121 INTEGER*8 :: noffd 00122 INTEGER :: i 00123 INTEGER :: mb 00124 INTEGER :: nbcol 00125 !$ INTEGER :: OMP_GET_MAX_THREADS 00126 00127 n=in 00128 ibfw=jbfw 00129 mxcnt=2**ibfw-1 00130 noffd=int8(n)*int8(n-1)*int8(ibfw)/2 00131 ndimb=noffd/32+n 00132 idimb=ndimb 00133 mb=INT(4.0E-6*FLOAT(ndimb)) 00134 WRITE(*,*) ' ' 00135 WRITE(*,*) 'CLBITS: symmetric matrix of dimension',n 00136 WRITE(*,*) 'CLBITS: off-diagonal elements',noffd 00137 IF (mb > 0) THEN 00138 WRITE(*,*) 'CLBITS: dimension of bit-array',ndimb , '(',mb,'MB)' 00139 ELSE 00140 WRITE(*,*) 'CLBITS: dimension of bit-array',ndimb , '(< 1 MB)' 00141 END IF 00142 CALL mpalloc(bitFieldCounters,ndimb,'INBITS: bit storage') 00143 bitFieldCounters=0 00144 ! encoding for compression 00145 nbcol=16 ! 16 bits for column number, 16 bits for column counter 00146 DO i=16,30 00147 IF (btest(n,i)) nbcol=i+1 ! more bits for column number 00148 END DO 00149 nencdb=32-nbcol 00150 iencdb=nencdb 00151 nencdm=ishft(1,nencdb)-1 00152 nthrd=1 00153 !$ NTHRD=OMP_GET_MAX_THREADS() 00154 RETURN 00155 END SUBROUTINE clbits 00156 00167 SUBROUTINE ndbits(ndims,ncmprs,nsparr,mnpair,ihst,jcmprs) 00168 USE mpbits 00169 00170 INTEGER(KIND=large), DIMENSION(4), INTENT(OUT) :: ndims 00171 INTEGER, DIMENSION(:), INTENT(OUT) :: ncmprs 00172 INTEGER(KIND=large), DIMENSION(:,:), INTENT(OUT) :: nsparr 00173 INTEGER, INTENT(IN) :: mnpair 00174 INTEGER, INTENT(IN) :: ihst 00175 INTEGER, INTENT(IN) :: jcmprs 00176 00177 INTEGER :: nwcp(0:1) 00178 INTEGER :: irgn(2) 00179 INTEGER :: inr(2) 00180 INTEGER :: ichunk 00181 INTEGER :: i 00182 INTEGER :: j 00183 INTEGER :: jb 00184 INTEGER :: m 00185 INTEGER :: last 00186 INTEGER :: lrgn 00187 INTEGER :: next 00188 INTEGER :: icp 00189 INTEGER :: kbfw 00190 INTEGER :: mm 00191 INTEGER :: jp 00192 INTEGER :: icmprs 00193 INTEGER :: nj 00194 INTEGER :: ib 00195 INTEGER :: ir 00196 INTEGER :: icount 00197 INTEGER :: iproc 00198 INTEGER :: jbfw 00199 INTEGER :: k 00200 INTEGER :: mb 00201 INTEGER :: n1 00202 INTEGER(KIND=large) :: ll 00203 INTEGER(KIND=large) :: lb 00204 INTEGER(KIND=large) :: nin 00205 INTEGER(KIND=large) :: ntot 00206 INTEGER*8 :: noffi 00207 REAL :: cpr 00208 REAL :: fracu 00209 REAL :: fracz 00210 LOGICAL :: btest 00211 !$ INTEGER :: OMP_GET_THREAD_NUM 00212 ndims(1)=ndimb 00213 ndims(2)=0 00214 ndims(3)=0 00215 ndims(4)=0 00216 ntot=0 00217 icmprs=jcmprs 00218 ! reduce bit field counters to bits 00219 kbfw=1 00220 ll=0 00221 lb=0 00222 ichunk=MIN((n+nthrd-1)/nthrd/32+1,256) 00223 IF (ibfw > 1.OR.icmprs /= 0) THEN 00224 IF (ibfw > 1.AND.icmprs > 0) kbfw=2 ! need to tag single precision entries 00225 jbfw=kbfw ! temp. bit field width 00226 IF (nthrd > 1) jbfw=ibfw ! don't mix rows in bitFieldCounters 00227 ! parallelize row loop 00228 ! private copy of NDIMS,NTOT for each thread, combined at end, init with 0. 00229 !$OMP PARALLEL DO & 00230 !$OMP PRIVATE(I,LL,MM,LB,MB,INR,IRGN,LAST,LRGN, & 00231 !$OMP J,ICOUNT,NEXT,IB,ICP,NWCP,JB,JP,IR) & 00232 !$OMP REDUCTION(+:NDIMS,NTOT) & 00233 !$OMP SCHEDULE(DYNAMIC,ICHUNK) 00234 DO i=1,n 00235 noffi=int8(i-1)*int8(i-2)*int8(ibfw)/2 00236 ll=noffi/32+i 00237 mm=0 00238 noffi=int8(i-1)*int8(i-2)*int8(jbfw)/2 00239 lb=noffi/32+i 00240 mb=0 00241 inr(1)=0 00242 inr(2)=0 00243 irgn(1)=0 00244 irgn(2)=0 00245 last=0 00246 lrgn=0 00247 iproc=0 00248 !$ IPROC=OMP_GET_THREAD_NUM() ! thread number 00249 00250 DO j=1,i-1 00251 00252 icount=0 00253 next=0 00254 DO ib=0,ibfw-1 00255 IF (btest(bitFieldCounters(ll),mm)) icount=ibset(icount,ib) 00256 mm=mm+1 00257 IF (mm >= 32) THEN 00258 ll=ll+1 00259 mm=mm-32 00260 END IF 00261 END DO 00262 DO jb=0,kbfw-1 00263 bitFieldCounters(lb)=ibclr(bitFieldCounters(lb),mb+jb) 00264 END DO 00265 00266 IF (icount > 0) THEN 00267 ntot=ntot+1 00268 IF (iproc == 0.AND.ihst > 0) CALL hmpent(ihst,FLOAT(icount)) 00269 END IF 00270 ! keep pair ? 00271 IF (icount >= mnpair) THEN 00272 next=1 ! double 00273 IF (icount <= icmprs.AND.icmprs > 0) next=2 ! single 00274 inr(next)=inr(next)+1 00275 bitFieldCounters(lb)=ibset(bitFieldCounters(lb),mb+next-1) 00276 IF (next /= last.OR.lrgn >= nencdm) THEN 00277 irgn(next)=irgn(next)+1 00278 lrgn=0 00279 END IF 00280 lrgn=lrgn+1 00281 END IF 00282 mb=mb+kbfw 00283 IF (mb >= 32) THEN 00284 lb=lb+1 00285 mb=mb-32 00286 END IF 00287 last=next 00288 END DO 00289 00290 DO jp=1,kbfw 00291 icp=0 00292 nwcp(0)=inr(jp) ! list of column indices (default) 00293 IF (inr(jp) > 0) THEN 00294 nwcp(1)=irgn(jp)+(irgn(jp)+7)/8 ! list of regions of consecutive columns 00295 ! compress row ? 00296 IF (nwcp(1) < nwcp(0).AND.icmprs /= 0) THEN 00297 icp=1 00298 ncmprs(i+n*(jp-1))=irgn(jp) 00299 END IF 00300 ndims(2) =ndims(2) +nwcp(icp) 00301 ndims(jp+2)=ndims(jp+2)+nwcp(0) 00302 END IF 00303 ir=i+(n+1)*(jp-1) 00304 nsparr(1,ir+1)=nwcp(icp) 00305 nsparr(2,ir+1)=nwcp(0) 00306 END DO 00307 00308 END DO 00309 00310 !$OMP END PARALLEL DO 00311 ! sum up, fill row offsets 00312 lb=1 00313 n1=0 00314 ll=n+1 00315 DO jp=1,kbfw 00316 DO i=1,n 00317 n1=n1+1 00318 nsparr(1,n1)=lb 00319 nsparr(2,n1)=ll 00320 lb=lb+nsparr(1,n1+1) 00321 ll=ll+nsparr(2,n1+1) 00322 END DO 00323 n1=n1+1 00324 nsparr(1,n1)=lb 00325 nsparr(2,n1)=ll 00326 ll=1 00327 END DO 00328 00329 IF (jbfw /= kbfw) THEN ! move bit fields 00330 DO i=1,n 00331 noffi=int8(i-1)*int8(i-2)*int8(jbfw)/2 00332 ll=noffi/32+i 00333 noffi=int8(i-1)*int8(i-2)*int8(kbfw)/2 00334 lb=noffi/32+i 00335 nj=((i-1)*kbfw)/32 00336 DO k=0,nj 00337 bitFieldCounters(lb+k)=bitFieldCounters(ll+k) 00338 END DO 00339 END DO 00340 END IF 00341 00342 ibfw=kbfw 00343 noffi=int8(n)*int8(n-1)*int8(ibfw)/2 00344 ndimb=noffi/32+n 00345 ndims(1)=ndimb 00346 00347 ELSE 00348 00349 nin=0 00350 nsparr(1,1)=1 00351 nsparr(2,1)=n+1 00352 n1=1 00353 DO i=1,n 00354 noffi=int8(i-1)*int8(i-2)/2 00355 ll=noffi/32+i 00356 nj=((i-1)*kbfw)/32 00357 DO k=0,nj 00358 DO m=0,31 00359 IF(btest(bitFieldCounters(ll+k),m)) nin=nin+1 00360 END DO 00361 END DO 00362 n1=n1+1 00363 nsparr(1,n1)=nsparr(1,1)+nin 00364 nsparr(2,n1)=nsparr(2,1)+nin 00365 END DO 00366 ndims(2)=nin 00367 ndims(3)=nin 00368 ntot=nin 00369 00370 END IF 00371 00372 nin=ndims(3)+ndims(4) 00373 fracz=200.0*FLOAT(ntot)/FLOAT(n)/FLOAT(n-1) 00374 fracu=200.0*FLOAT(nin)/FLOAT(n)/FLOAT(n-1) 00375 WRITE(*,*) ' ' 00376 WRITE(*,*) 'NDBITS: number of diagonal elements',n 00377 WRITE(*,*) 'NDBITS: number of used off-diagonal elements',nin 00378 WRITE(*,1000) 'fraction of non-zero off-diagonal elements', fracz 00379 WRITE(*,1000) 'fraction of used off-diagonal elements', fracu 00380 IF (icmprs /= 0) THEN 00381 cpr=100.0*FLOAT(ndims(2)+2*ndims(3)+ndims(4))/FLOAT(3*nin) 00382 WRITE(*,1000) 'compression ratio for off-diagonal elements', cpr 00383 END IF 00384 1000 FORMAT(' NDBITS: ',a,f6.2,' %') 00385 RETURN 00386 END SUBROUTINE ndbits 00387 00395 SUBROUTINE ckbits(ndims,mnpair,jcmprs) 00396 USE mpbits 00397 00398 INTEGER(KIND=large), DIMENSION(4), INTENT(OUT) :: ndims 00399 INTEGER, INTENT(IN) :: mnpair 00400 INTEGER, INTENT(IN) :: jcmprs 00401 00402 INTEGER :: nwcp(0:1) 00403 INTEGER :: irgn(2) 00404 INTEGER :: inr(2) 00405 INTEGER(KIND=large) :: ll 00406 INTEGER*8 :: noffi 00407 INTEGER :: i 00408 INTEGER :: j 00409 INTEGER :: last 00410 INTEGER :: lrgn 00411 INTEGER :: next 00412 INTEGER :: icp 00413 INTEGER :: ib 00414 INTEGER :: icount 00415 INTEGER :: icmprs 00416 INTEGER :: kbfw 00417 INTEGER :: jp 00418 INTEGER :: mm 00419 LOGICAL :: btest 00420 00421 DO i=1,4 00422 ndims(i)=0 00423 END DO 00424 icmprs=jcmprs 00425 kbfw=1 00426 IF (ibfw > 1.AND.icmprs > 0) kbfw=2 00427 ll=0 00428 00429 DO i=1,n 00430 noffi=int8(i-1)*int8(i-2)*int8(ibfw)/2 00431 ll=noffi/32+i 00432 mm=0 00433 inr(1)=0 00434 inr(2)=0 00435 irgn(1)=0 00436 irgn(2)=0 00437 last=0 00438 lrgn=0 00439 DO j=1,i-1 00440 icount=0 00441 next=0 00442 DO ib=0,ibfw-1 00443 IF (btest(bitFieldCounters(ll),mm)) icount=ibset(icount,ib) 00444 mm=mm+1 00445 IF (mm >= 32) THEN 00446 ll=ll+1 00447 mm=mm-32 00448 END IF 00449 END DO 00450 00451 IF (icount > 0) ndims(1)=ndims(1)+1 00452 ! keep pair ? 00453 IF (icount >= mnpair) THEN 00454 next=1 ! double 00455 IF (icount <= icmprs.AND.icmprs > 0) next=2 ! single 00456 inr(next)=inr(next)+1 00457 IF (next /= last.OR.lrgn >= nencdm) THEN 00458 irgn(next)=irgn(next)+1 00459 lrgn=0 00460 END IF 00461 lrgn=lrgn+1 00462 END IF 00463 last=next 00464 END DO 00465 00466 IF (icmprs /= 0) THEN 00467 DO jp=1,kbfw 00468 IF (inr(jp) > 0) THEN 00469 icp=0 00470 nwcp(0)=inr(jp) ! list of column indices (default) 00471 nwcp(1)=irgn(jp)+(irgn(jp)+7)/8 ! list of regions of consecutive columns 00472 ! compress row ? 00473 IF (nwcp(1) < nwcp(0)) icp=1 00474 ndims(2) =ndims(2) +nwcp(icp) 00475 ndims(jp+2)=ndims(jp+2)+nwcp(0) 00476 END IF 00477 END DO 00478 ELSE 00479 ndims(2)=ndims(2)+inr(1) 00480 ndims(3)=ndims(3)+inr(1) 00481 END IF 00482 00483 END DO 00484 00485 RETURN 00486 END SUBROUTINE ckbits 00487 00494 SUBROUTINE spbits(nsparr,nsparc,ncmprs) ! collect elements 00495 USE mpbits 00496 USE mpdalc 00497 00498 INTEGER(KIND=large), DIMENSION(:,:), INTENT(IN) :: nsparr 00499 INTEGER, DIMENSION(:), INTENT(OUT) :: nsparc 00500 INTEGER, DIMENSION(:), INTENT(IN) :: ncmprs 00501 00502 INTEGER(KIND=large) :: kl 00503 INTEGER(KIND=large) :: l 00504 INTEGER(KIND=large) :: ll 00505 INTEGER(KIND=large) :: l1 00506 INTEGER(KIND=large) :: k8 00507 INTEGER(KIND=large) :: n1 00508 INTEGER*8 :: noffi 00509 INTEGER :: i 00510 INTEGER :: j 00511 INTEGER :: j1 00512 INTEGER :: jb 00513 INTEGER :: jn 00514 INTEGER :: m 00515 INTEGER :: ichunk 00516 INTEGER :: next 00517 INTEGER :: last 00518 INTEGER :: lrgn 00519 INTEGER :: nrgn 00520 INTEGER :: nrgn8 00521 LOGICAL :: btest 00522 00523 ichunk=MIN((n+nthrd-1)/nthrd/32+1,256) 00524 00525 DO jb=0,ibfw-1 00526 ! parallelize row loop 00527 !$OMP PARALLEL DO & 00528 !$OMP PRIVATE(I,N1,NOFFI,L,M,KL,L1,NRGN,NRGN8,K8, & 00529 !$OMP LAST,LRGN,LL,J1,JN,J,NEXT) & 00530 !$OMP SCHEDULE(DYNAMIC,ICHUNK) 00531 DO i=1,n 00532 n1=i+jb*(n+1) 00533 noffi=int8(i-1)*int8(i-2)*int8(ibfw)/2 00534 l=noffi/32+i 00535 m=jb 00536 kl=nsparr(1,n1)-1 ! pointer to row in NSPARC 00537 l1=nsparr(2,n1) ! pointer to row in sparse matrix 00538 nrgn=ncmprs(i+n*jb)! compression (number of consecutive regions) 00539 nrgn8=(nrgn+7)/8 ! number of groups (1 offset per group) 00540 k8=kl 00541 kl=kl+nrgn8 ! reserve space of offsets 00542 last=0 00543 lrgn=0 00544 ll=l1-1 00545 j1=0 00546 jn=0 00547 00548 DO j=1,i-1 ! loop for off-diagonal elements 00549 next=0 00550 IF(bitFieldCounters(l) /= 0) THEN 00551 IF(btest(bitFieldCounters(l),m)) THEN 00552 ll=ll+1 00553 IF (nrgn <= 0) THEN 00554 kl=kl+1 00555 nsparc(kl)=j ! column index 00556 ELSE 00557 next=1 00558 IF (last == 0.OR.jn >= nencdm) THEN 00559 IF (MOD(lrgn,8) == 0) THEN 00560 k8=k8+1 00561 nsparc(k8)=INT(ll-l1) 00562 END IF 00563 lrgn=lrgn+1 00564 kl=kl+1 00565 j1=ishft(j,nencdb) 00566 jn=0 00567 END IF 00568 jn=jn+1 00569 nsparc(kl)=ior(j1,jn) 00570 END IF 00571 END IF 00572 END IF 00573 last=next 00574 m=m+ibfw 00575 IF (m >= 32) THEN 00576 m=m-32 00577 l=l+1 00578 END IF 00579 00580 END DO 00581 END DO 00582 !$OMP END PARALLEL DO 00583 00584 END DO 00585 00586 n1=(n+1)*ibfw 00587 WRITE(*,*) ' ' 00588 WRITE(*,*) 'SPBITS: sparse structure constructed ',nsparr(1,n1), ' words' 00589 WRITE(*,*) 'SPBITS: dimension parameter of matrix',nsparr(2,1)-1 00590 IF (ibfw <= 1) THEN 00591 WRITE(*,*) 'SPBITS: index of last used location',nsparr(2,n1)-1 00592 ELSE 00593 WRITE(*,*) 'SPBITS: index of last used double',nsparr(2,n1/2)-1 00594 WRITE(*,*) 'SPBITS: index of last used single',nsparr(2,n1)-1 00595 END IF 00596 CALL mpdealloc(bitFieldCounters) 00597 RETURN 00598 END SUBROUTINE spbits 00599 00600