Millepede-II V04-16-01
mpbits.f90
Go to the documentation of this file.
1
38
40MODULE mpbits
41 USE mpdef
42 IMPLICIT NONE
43
44 INTEGER(mpl) :: ndimb
45 INTEGER(mpl) :: ndimb2
46 INTEGER(mpi) :: n
47 INTEGER(mpi) :: nar
48 INTEGER(mpi) :: nac
49 INTEGER(mpi) :: n2
50 INTEGER(mpi) :: ibfw
51 INTEGER(mpi) :: ireqpe
52 INTEGER(mpi) :: isngpe
53 INTEGER(mpi) :: iextnd
54 INTEGER(mpi) :: nspc
55 INTEGER(mpi) :: mxcnt
56 INTEGER(mpi) :: nthrd
57 INTEGER(mpi), DIMENSION(:), ALLOCATABLE :: bitfieldcounters
58 INTEGER(mpi), DIMENSION(:), ALLOCATABLE :: bitmap
59 INTEGER(mpi), PARAMETER :: bs = bit_size(1_mpi)
60
61END MODULE mpbits
62
69SUBROUTINE inbits(im,jm,inc) ! include element (I,J)
70 USE mpbits
71 IMPLICIT NONE
72
73 INTEGER(mpi), INTENT(IN) :: im
74 INTEGER(mpi), INTENT(IN) :: jm
75 INTEGER(mpi), INTENT(IN) :: inc
76
77 INTEGER(mpl) :: l
78 INTEGER(mpi) :: i
79 INTEGER(mpi) :: j
80 INTEGER(mpi) :: noffj
81 INTEGER(mpi) :: nout
82 INTEGER(mpi) :: m
83 INTEGER(mpi) :: icount
84 INTEGER(mpi) :: jcount
85 INTEGER(mpl) :: noffi
86
87 ! diagonal included now !
88 !IF(im == jm) RETURN ! diagonal
89 j=min(im,jm)
90 i=max(im,jm)
91 IF(j <= 0) RETURN ! out low
92 IF(i > n) RETURN ! out high
93 IF(iextnd >= 0) THEN
94 ! lower triangle, row 'i', col 'j'
95 noffi=int(i-1,mpl)*int(i,mpl)*int(ibfw,mpl)/2 ! row offset for J=1
96 noffj=(j-1)*ibfw
97 l=noffi/bs+i+noffj/bs ! row offset + column offset
98 ! add I instead of 1 to keep bit maps of different rows in different words (openMP !)
99 ELSE
100 ! upper triangle, row 'j', col 'i'
101 noffi=int(j-1,mpl)*int(2*n+2-j,mpl)/2 ! row offset for I=J
102 noffj=i-j
103 l=noffi/bs+j+noffj/bs ! row offset + column offset
104 ! add J instead of 1 to keep bit maps of different rows in different words (openMP !)
105 ENDIF
106 m=mod(noffj,bs)
107 IF (ibfw <= 1) THEN
109 ELSE
110 ! get counter from bit field
111 icount=0
112 nout=m+ibfw-bs ! number of bits outside word
113 IF (nout <= 0) THEN
114 ! inside single word
115 CALL mvbits(bitfieldcounters(l),m,ibfw,icount,0)
116 ELSE
117 ! spread over two words
118 CALL mvbits(bitfieldcounters(l),m,ibfw-nout,icount,0)
119 CALL mvbits(bitfieldcounters(l+1),0,nout,icount,ibfw-nout)
120 ENDIF
121 ! increment
122 jcount=icount
123 icount=min(icount+inc,mxcnt)
124 ! store counter into bit field
125 IF (icount /= jcount) THEN
126 IF (nout <= 0) THEN
127 ! inside single word
128 CALL mvbits(icount,0,ibfw,bitfieldcounters(l),m)
129 ELSE
130 ! spread over two words
131 CALL mvbits(icount,0,ibfw-nout,bitfieldcounters(l),m)
132 CALL mvbits(icount,ibfw-nout,nout,bitfieldcounters(l+1),0)
133 ENDIF
134 END IF
135 END IF
136 RETURN
137
138END SUBROUTINE inbits
139
145SUBROUTINE irbits(i,j) ! include element (I,J)
146 USE mpbits
147 IMPLICIT NONE
148
149 INTEGER(mpi), INTENT(IN) :: i
150 INTEGER(mpi), INTENT(IN) :: j
151
152 INTEGER(mpl) :: l
153 INTEGER(mpi) :: noffj
154 INTEGER(mpi) :: m
155 INTEGER(mpl) :: noffi
156
157 IF(i > nar.OR.j > nac) RETURN ! out high
158 ! upper triangle, rectangular part
159 noffi=ndimb+int(i-1,mpl)*int(nac/bs+1,mpl)+1 ! row offset for J=1
160 noffj=j-1
161 l=noffi+noffj/bs ! row offset + column offset
162 m=mod(noffj,bs)
164 RETURN
165
166END SUBROUTINE irbits
167
178SUBROUTINE clbits(in,jreqpe,jhispe,jsngpe,jextnd,idimb,ispc)
179 USE mpbits
180 USE mpdalc
181 IMPLICIT NONE
182
183 INTEGER(mpi), INTENT(IN) :: in
184 INTEGER(mpi), INTENT(IN) :: jreqpe
185 INTEGER(mpi), INTENT(IN) :: jhispe
186 INTEGER(mpi), INTENT(IN) :: jsngpe
187 INTEGER(mpi), INTENT(IN) :: jextnd
188 INTEGER(mpl), INTENT(OUT) :: idimb
189 INTEGER(mpi), INTENT(OUT) :: ispc
190
191 INTEGER(mpl) :: noffd
192 INTEGER(mpi) :: i
193 INTEGER(mpi) :: icount
194 INTEGER(mpi) :: mb
195 !$ INTEGER(mpi) :: OMP_GET_MAX_THREADS
196 ! save input parameter
197 n=in
198 nar=0
199 nac=0
200 ireqpe=jreqpe
201 isngpe=jsngpe
202 iextnd=max(0,jextnd)
203 ! number of precision types (D, F)
204 ispc=1
205 ! bit field size
206 ibfw=1 ! number of bits needed to count up to ICOUNT
207 mxcnt=1
208 IF (jextnd >= 0) THEN
209 ! optional larger bit fields for lower triangle
210 icount=max(jsngpe+1,jhispe)
211 icount=max(jreqpe,icount)
212 DO i=1,30
213 IF (icount > mxcnt) THEN
214 ibfw=ibfw+1
215 mxcnt=mxcnt*2+1
216 END IF
217 END DO
218 IF (jsngpe>0) ispc=2
219 END IF
220 ! bit field array size
221 noffd=int(n,mpl)*int(n+1,mpl)*int(ibfw,mpl)/2
222 ndimb=noffd/bs+n
223 idimb=ndimb
224 nspc=ispc
225 mb=int(4.0e-6*real(ndimb,mps),mpi)
226 WRITE(*,*) ' '
227 WRITE(*,*) 'CLBITS: symmetric matrix of dimension',n
228 WRITE(*,*) 'CLBITS: off-diagonal elements',noffd
229 IF (mb > 0) THEN
230 WRITE(*,*) 'CLBITS: dimension of bit-array',ndimb , '(',mb,'MB)'
231 ELSE
232 WRITE(*,*) 'CLBITS: dimension of bit-array',ndimb , '(< 1 MB)'
233 END IF
234 CALL mpalloc(bitfieldcounters,ndimb,'INBITS: bit storage')
236 nthrd=1
237 !$ NTHRD=OMP_GET_MAX_THREADS()
238 RETURN
239END SUBROUTINE clbits
240
251SUBROUTINE plbits(in,inar,inac,idimb)
252 USE mpbits
253 USE mpdalc
254 IMPLICIT NONE
255
256 INTEGER(mpi), INTENT(IN) :: in
257 INTEGER(mpi), INTENT(IN) :: inar
258 INTEGER(mpi), INTENT(IN) :: inac
259 INTEGER(mpl), INTENT(OUT) :: idimb
260
261 INTEGER(mpl) :: noffd
262 INTEGER(mpi) :: mb
263 !$ INTEGER(mpi) :: OMP_GET_MAX_THREADS
264 ! save input parameter
265 n=in
266 nar=inar
267 nac=inac
268 ireqpe=1
269 isngpe=0
270 iextnd=-1 ! upper triangle
271 ibfw=1
272 ! bit field array size
273 noffd=int(n,mpl)*int(n+1,mpl)/2
274 ndimb=noffd/bs+n
275 idimb=ndimb+int(nar,mpl)*int(nac/bs+1,mpl)
276 nspc=1
277 mb=int(4.0e-6*real(idimb,mps),mpi)
278 WRITE(*,*) ' '
279 WRITE(*,*) 'PLBITS: symmetric matrix of dimension',n, '(',nar, nac,')'
280 WRITE(*,*) 'PLBITS: off-diagonal elements',noffd, '(',int(nar,mpl)*int(nac,mpl),')'
281 IF (mb > 0) THEN
282 WRITE(*,*) 'PLBITS: dimension of bit-array',idimb , '(',mb,'MB)'
283 ELSE
284 WRITE(*,*) 'PLBITS: dimension of bit-array',idimb , '(< 1 MB)'
285 END IF
286 CALL mpalloc(bitfieldcounters,idimb,'INBITS: bit storage')
288 nthrd=1
289 !$ NTHRD=OMP_GET_MAX_THREADS()
290 RETURN
291END SUBROUTINE plbits
292
301SUBROUTINE ndbits(npgrp,ndims,nsparr,ihst)
302 USE mpbits
303 USE mpdalc
304 IMPLICIT NONE
305
306 INTEGER(mpi), DIMENSION(:), INTENT(IN) :: npgrp
307 INTEGER(mpl), DIMENSION(4), INTENT(OUT) :: ndims
308 INTEGER(mpl), DIMENSION(:,:), INTENT(OUT) :: nsparr
309 INTEGER(mpi), INTENT(IN) :: ihst
310
311 INTEGER(mpi) :: nwcp(0:1)
312 INTEGER(mpi) :: irgn(2)
313 INTEGER(mpi) :: inr(2)
314 INTEGER(mpi) :: ichunk
315 INTEGER(mpi) :: i
316 INTEGER(mpi) :: j
317 INTEGER(mpi) :: jcol
318 INTEGER(mpi) :: last
319 INTEGER(mpi) :: lrgn
320 INTEGER(mpi) :: next
321 INTEGER(mpi) :: icp
322 INTEGER(mpi) :: mm
323 INTEGER(mpi) :: jp
324 INTEGER(mpi) :: n1
325 INTEGER(mpi) :: nd
326 INTEGER(mpi) :: ib
327 INTEGER(mpi) :: ir
328 INTEGER(mpi) :: icount
329 INTEGER(mpi) :: iproc
330 INTEGER(mpi) :: iword
331 INTEGER(mpi) :: mb
332 INTEGER(mpl) :: ll
333 INTEGER(mpl) :: lb
334 INTEGER(mpl) :: nin
335 INTEGER(mpl) :: npar
336 INTEGER(mpl) :: ntot
337 INTEGER(mpl) :: nskyln
338 INTEGER(mpl) :: noffi
339 INTEGER(mpl) :: noffj
340 REAL(mps) :: cpr
341 REAL(mps) :: fracu
342 REAL(mps) :: fracs
343 REAL(mps) :: fracz
344 LOGICAL :: btest
345 !$ INTEGER(mpi) :: OMP_GET_THREAD_NUM
346 INTEGER(mpi), DIMENSION(:), ALLOCATABLE :: lastRowInCol
347
348 ll=int(n,mpl)*int(nthrd,mpl)
349 CALL mpalloc(lastrowincol,ll,'NDBITS: last (non zero) row in col.')
350 lastrowincol=0
351
352 nd=npgrp(n+1)-npgrp(1) ! number of diagonal elements
353
354 ndims(1)=ndimb
355 ndims(2)=0
356 ndims(3)=0
357 ndims(4)=0
358 ntot=0
359 ll=0
360 lb=0
361 ichunk=min((n+nthrd-1)/nthrd/32+1,256)
362 ! reduce bit field counters to (precision type) bits, analyze precision type bit fields ('1st half' (j<=i))
363 ! parallelize row loop
364 ! private copy of NTOT for each thread, combined at end, init with 0.
365 !$OMP PARALLEL DO &
366 !$OMP PRIVATE(I,NOFFI,LL,MM,LB,MB,IWORD,IPROC,J,ICOUNT,IB,INR,IRGN,LAST,LRGN,NEXT,JP,IR,NPAR) &
367 !$OMP REDUCTION(+:NTOT) &
368 !$OMP SCHEDULE(DYNAMIC,ICHUNK)
369 DO i=1,n
370 noffi=int(i-1,mpl)*int(i,mpl)*int(ibfw,mpl)/2
371 ll=noffi/bs+i
372 mm=0
373 lb=ll
374 mb=0
375 iword=0 ! reset temporary bit fields
376 iproc=0
377 !$ IPROC=OMP_GET_THREAD_NUM() ! thread number
378 inr(1)=0
379 inr(2)=0
380 irgn(1)=1 ! 'end marker' region
381 irgn(2)=1
382 last=0
383 lrgn=0
384 npar=0
385
386 DO j=1,i ! loop until diagonal element
387 ! get (pair) counter
388 icount=0
389 next=0
390 DO ib=0,ibfw-1
391 IF (btest(bitfieldcounters(ll),mm)) icount=ibset(icount,ib)
392 mm=mm+1
393 IF (mm >= bs) THEN
394 ll=ll+1
395 mm=mm-bs
396 END IF
397 END DO
398
399 IF (icount > 0) THEN
400 npar=npar+npgrp(j+1)-npgrp(j)
401 IF (iproc == 0.AND.ihst > 0) CALL hmpent(ihst,real(icount,mps))
402 END IF
403
404 ! keep pair ?
405 IF (icount >= ireqpe) THEN
406 next=1 ! double
407 IF (icount <= isngpe) next=2 ! single
408 iword=ibset(iword,mb+next-1)
409 inr(next)=inr(next)+npgrp(j+1)-npgrp(j) ! number of parameters
410 IF (next /= last) THEN
411 irgn(next)=irgn(next)+1
412 END IF
413 jcol=j+iproc*n
414 lastrowincol(jcol)=max(lastrowincol(jcol),i)
415 END IF
416 last=next
417 ! save condensed bitfield
418 mb=mb+nspc
419 IF (mb >= bs) THEN
420 bitfieldcounters(lb)=iword ! store
421 iword=0
422 lb=lb+1
423 mb=mb-bs
424 END IF
425 END DO
426 bitfieldcounters(lb)=iword ! store
427 ntot=ntot+npar*(npgrp(i+1)-npgrp(i))
428
429 ! save row statistics
430 ir=i+1
431 DO jp=1,nspc
432 nsparr(1,ir)=irgn(jp) ! number of regions per row and precision
433 nsparr(2,ir)=inr(jp) ! number of columns per row and precision (groups)
434 ir=ir+n+1
435 END DO
436
437 END DO
438 !$OMP END PARALLEL DO
439
440 ! analyze precision type bit fields for extended storage, check for row compression
441
442 ! parallelize row loop
443 ! private copy of NDIMS for each thread, combined at end, init with 0.
444 !$OMP PARALLEL DO &
445 !$OMP PRIVATE(I,NOFFI,NOFFJ,LL,MM,INR,IRGN,LAST,LRGN,J,NEXT,ICP,NWCP,JP,IR,IB) &
446 !$OMP REDUCTION(+:NDIMS) &
447 !$OMP SCHEDULE(DYNAMIC,ICHUNK)
448 DO i=1,n
449 ! restore row statistics
450 ir=i+1
451 DO jp=1,nspc
452 irgn(jp)=int(nsparr(1,ir),mpi) ! number of regions per row and precision
453 inr(jp)=int(nsparr(2,ir),mpi) ! number of columns per row and precision (groups)
454 ir=ir+n+1
455 END DO
456
457 ! analyze precision type bit fields for extended storage ('2nd half' (j>i) too) ?
458 IF (iextnd > 0) THEN
459
460 noffj=(i-1)*nspc
461 mm=int(mod(noffj,int(bs,mpl)),mpi)
462
463 last=0
464 lrgn=0
465
466 ! remaining columns
467 DO j=i+1, n
468 ! index for pair (J,I)
469 noffi=int(j-1,mpl)*int(j,mpl)*int(ibfw,mpl)/2 ! for I=1
470 ll=noffi/bs+j+noffj/bs ! row offset + column offset
471
472 ! get precision type
473 next=0
474 DO ib=0,nspc-1
475 IF (btest(bitfieldcounters(ll),mm+ib)) next=ibset(next,ib)
476 END DO
477
478 ! keep pair ?
479 IF (next > 0) THEN
480 inr(next)=inr(next)+npgrp(j+1)-npgrp(j) ! number of parameters
481 IF (next /= last) THEN
482 irgn(next)=irgn(next)+1
483 END IF
484 END IF
485 last=next
486 END DO
487 END IF
488
489 ! row statistics, compression
490 ir=i+1
491 DO jp=1,nspc
492 icp=0
493 nwcp(0)=inr(jp) ! list of column indices (default)
494 IF (inr(jp) > 0) THEN
495 nwcp(1)=irgn(jp)*2 ! list of regions (group starts and offsets)
496 ! compress row ?
497 IF ((nwcp(1) < nwcp(0)).OR.iextnd > 0) THEN
498 icp=1
499 END IF
500 ! total space
501 ndims(2) =ndims(2) +nwcp(icp)
502 ndims(jp+2)=ndims(jp+2)+nwcp(0)*(npgrp(i+1)-npgrp(i))
503 END IF
504 ! per row and precision
505 nsparr(1,ir)=nwcp(icp)
506 nsparr(2,ir)=nwcp(0)*(npgrp(i+1)-npgrp(i))
507 ir=ir+n+1
508 END DO
509 END DO
510 !$OMP END PARALLEL DO
511
512 ! sum up, fill row offsets
513 lb=0
514 n1=0
515 ll=nd
516 DO jp=1,nspc
517 DO i=1,n
518 n1=n1+1
519 nsparr(1,n1)=lb
520 nsparr(2,n1)=ll
521 lb=lb+nsparr(1,n1+1)
522 ll=ll+nsparr(2,n1+1)
523 END DO
524 n1=n1+1
525 nsparr(1,n1)=lb
526 nsparr(2,n1)=ll
527 ll=0
528 END DO
529
530 ! look at skyline
531 ! first combine threads
532 ll=n
533 DO j=2,nthrd
534 DO i=1,n
535 ll=ll+1
536 lastrowincol(i)=max(lastrowincol(i),lastrowincol(ll))
537 END DO
538 END DO
539 ! sumup
540 nskyln=0
541 DO i=1,n
542 npar=npgrp(lastrowincol(i)+1)-npgrp(i)
543 nskyln=nskyln+npar*(npgrp(i+1)-npgrp(i))
544 END DO
545 ! cleanup
546 CALL mpdealloc(lastrowincol)
547
548 nin=ndims(3)+ndims(4)
549 fracz=200.0*real(ntot,mps)/real(nd,mps)/real(nd-1,mps)
550 fracu=200.0*real(nin,mps)/real(nd,mps)/real(nd-1,mps)
551 fracs=200.0*real(nskyln,mps)/real(nd,mps)/real(nd+1,mps)
552 WRITE(*,*) ' '
553 WRITE(*,*) 'NDBITS: number of diagonal elements',nd
554 WRITE(*,*) 'NDBITS: number of used off-diagonal elements',nin
555 WRITE(*,1000) 'fraction of non-zero off-diagonal elements', fracz
556 WRITE(*,1000) 'fraction of used off-diagonal elements', fracu
557 cpr=100.0*real(mpi*ndims(2)+mpd*ndims(3)+mps*ndims(4),mps)/real((mpd+mpi)*nin,mps)
558 WRITE(*,1000) 'compression ratio for off-diagonal elements', cpr
559 WRITE(*,1000) 'fraction inside skyline ', fracs
5601000 FORMAT(' NDBITS: ',a,f6.2,' %')
561 RETURN
562END SUBROUTINE ndbits
563
574SUBROUTINE pbsbits(npgrp,ibsize,nnzero,nblock,nbkrow)
575 USE mpbits
576 USE mpdalc
577 IMPLICIT NONE
578
579 INTEGER(mpi), DIMENSION(:), INTENT(IN) :: npgrp
580 INTEGER(mpi), INTENT(IN) :: ibsize
581 INTEGER(mpl), INTENT(OUT) :: nnzero
582 INTEGER(mpl), INTENT(OUT) :: nblock
583 INTEGER(mpi), DIMENSION(:),INTENT(OUT) :: nbkrow
584
585 INTEGER(mpi) :: ichunk
586 INTEGER(mpi) :: i
587 INTEGER(mpi) :: ib
588 INTEGER(mpi) :: igrpf
589 INTEGER(mpi) :: igrpl
590 INTEGER(mpi) :: iproc
591 INTEGER(mpi) :: ioffb
592 INTEGER(mpi) :: ioffg
593 INTEGER(mpi) :: j
594 INTEGER(mpi) :: mb
595 INTEGER(mpi) :: mbt
596 INTEGER(mpi) :: mm
597 INTEGER(mpi) :: nd
598 INTEGER(mpi) :: ngrp
599 INTEGER(mpi) :: ir
600 INTEGER(mpi) :: irfrst
601 INTEGER(mpi) :: irlast
602 INTEGER(mpi) :: jb
603 INTEGER(mpi) :: jc
604 INTEGER(mpl) :: length
605 INTEGER(mpl) :: ll
606 INTEGER(mpl) :: noffi
607 INTEGER(mpl), PARAMETER :: two=2
608 INTEGER(mpi), DIMENSION(:,:), ALLOCATABLE :: rowBlocksToGroups
609 INTEGER(mpi), DIMENSION(:), ALLOCATABLE :: blockCounter
610 INTEGER(mpi), DIMENSION(:), ALLOCATABLE :: groupList
611 !$ INTEGER(mpi) :: OMP_GET_THREAD_NUM
612
613 LOGICAL :: btest
614
615 nnzero=0
616 nblock=0
617 nbkrow=0
618
619 !$POMP INST BEGIN(pbsbits)
620 nd=npgrp(n+1)-npgrp(1) ! number of diagonal elements
621 mb=(nd+nac-1)/ibsize+1 ! max. number of blocks per row/column
622 mbt=(nd-1)/ibsize+1 ! max. number of blocks in triangular part
623 length=int(mb,mpl)*int(nthrd,mpl)
624 CALL mpalloc(blockcounter,length,'PBBITS: block counter')
625 length=int(n,mpl)*int(nthrd,mpl)
626 CALL mpalloc(grouplist,length,'PBBITS: group list')
627
628 ! mapping row blocks to parameters groups
629 length=int(mbt,mpl)
630 CALL mpalloc(rowblockstogroups,two,length,'mapping row blocks to par. groups (I)')
631 rowblockstogroups(:,:)=0
632 igrpf=1 ! first group of block
633 igrpl=1 ! last group of block
634 ir=1 ! first row of block
635 DO i=1,mbt
636 DO WHILE (igrpf < n .AND. npgrp(igrpf+1) <= ir)
637 igrpf=igrpf+1
638 END DO
639 rowblockstogroups(1,i)=igrpf
640 ir=ir+ibsize
641 DO WHILE (igrpl < n .AND. npgrp(igrpl+1) < ir)
642 igrpl=igrpl+1
643 END DO
644 rowblockstogroups(2,i)=igrpl
645 END DO
646
647 ll=0
648 ichunk=min((mbt+nthrd-1)/nthrd/32+1,256)
649 ! analyze bit fields ('upper half' (j>=i))
650 ! parallelize row loop
651 !$OMP PARALLEL DO &
652 !$OMP PRIVATE(IPROC,IOFFB,IOFFG,I,NOFFI,LL,MM,NGRP,J,IR,IRFRST,IRLAST) &
653 !$OMP REDUCTION(+:NNZERO,NBLOCK) &
654 !$OMP SCHEDULE(DYNAMIC,ICHUNK)
655 DO ib=1,mbt
656 irfrst=ibsize*(ib-1)+1 ! first row in block
657 irlast=ibsize*ib ! last row in block
658 iproc=0
659 !$ IPROC=OMP_GET_THREAD_NUM() ! thread number
660 ioffb=iproc*mb
661 ioffg=iproc*n
662 blockcounter(ioffb+1:ioffb+mb)=0
663 DO i=rowblockstogroups(1,ib),rowblockstogroups(2,ib)
664 noffi=int(i-1,mpl)*int(2*n+2-i,mpl)/2 ! row offset for I=J
665 ll=noffi/bs+i
666 mm=0
667 ngrp=0
668 DO j=i,n ! loop from diagonal element
669 IF (btest(bitfieldcounters(ll),mm)) THEN ! found group
670 ngrp=ngrp+1
671 grouplist(ioffg+ngrp)=j
672 END IF
673 mm=mm+1
674 IF (mm >= bs) THEN
675 ll=ll+1
676 mm=mm-bs
677 END IF
678 END DO
679 ! analyze rows (in overlap of group 'i 'and block 'ib')
680 DO ir=max(irfrst,npgrp(i)),min(irlast,npgrp(i+1)-1)
681 ! triangular part (parameter groups)
682 DO j=1,ngrp
683 ! column loop
684 DO jc=max(ir,npgrp(grouplist(ioffg+j))),npgrp(grouplist(ioffg+j)+1)-1
685 ! block number
686 jb=(jc-1)/ibsize+1
687 blockcounter(ioffb+jb)=blockcounter(ioffb+jb)+1
688 END DO
689 END DO
690 ! rectangular part (parameters, constraints)
691 noffi=ndimb+int(ir-1,mpl)*int(nac/bs+1,mpl)+1 ! row offset for J=1
692 ll=noffi ! row offset
693 mm=0
694 DO j=1,nac
695 IF (btest(bitfieldcounters(ll),mm)) THEN ! found group
696 ! block number
697 jb=(nd+j-1)/ibsize+1
698 blockcounter(ioffb+jb)=blockcounter(ioffb+jb)+1
699 END IF
700 mm=mm+1
701 IF (mm >= bs) THEN
702 ll=ll+1
703 mm=mm-bs
704 END IF
705 END DO
706 END DO
707 END DO
708 ! end of 'row' block
709 DO j=1,mb
710 IF (blockcounter(ioffb+j) > 0) THEN
711 nnzero=nnzero+blockcounter(ioffb+j)
712 nblock=nblock+1
713 nbkrow(ib)=nbkrow(ib)+1
714 ENDIF
715 END DO
716 END DO
717 !$OMP END PARALLEL DO
718
719 ! 'empty' diagonal elements needed too
720 DO ib=mbt+1,mb
721 nnzero=nnzero+ibsize
722 nblock=nblock+1
723 nbkrow(ib)=1
724 END DO
725
726 ! cleanup
727 CALL mpdealloc(grouplist)
728 CALL mpdealloc(blockcounter)
729 CALL mpdealloc(rowblockstogroups)
730
731 !$POMP INST END(pbsbits)
732 WRITE(*,*) ' '
733 WRITE(*,*) 'PBSBITS: number of used elements', nnzero
734 WRITE(*,1000) 'fraction of used elements', 200.0*real(nnzero,mps)/real(nd+nac,mps)/real(nd+nac+1,mps)
735 WRITE(*,*) 'PBSBITS: block size', ibsize
736 WRITE(*,*) 'PBSBITS: number of (used) blocks', nblock
737 WRITE(*,1000) 'fraction of used storage ', 100.0*real(ibsize*ibsize+1,mps)*real(nblock,mps)/real(2*nnzero,mps)
7381000 FORMAT(' PBSBITS: ',a,f7.2,' %')
739 RETURN
740END SUBROUTINE pbsbits
741
751SUBROUTINE pblbits(npgrp,ibsize,nsparr,nsparc)
752 USE mpbits
753 USE mpdalc
754 IMPLICIT NONE
755
756 INTEGER(mpi), DIMENSION(:), INTENT(IN) :: npgrp
757 INTEGER(mpi), INTENT(IN) :: ibsize
758 INTEGER(mpl), DIMENSION(:), INTENT(IN) :: nsparr
759 INTEGER(mpl), DIMENSION(:), INTENT(OUT) :: nsparc
760
761 INTEGER(mpi) :: ichunk
762 INTEGER(mpi) :: i
763 INTEGER(mpi) :: ib
764 INTEGER(mpi) :: igrpf
765 INTEGER(mpi) :: igrpl
766 INTEGER(mpi) :: iproc
767 INTEGER(mpi) :: ioffb
768 INTEGER(mpi) :: ioffg
769 INTEGER(mpi) :: j
770 INTEGER(mpi) :: mb
771 INTEGER(mpi) :: mbt
772 INTEGER(mpi) :: mm
773 INTEGER(mpi) :: nd
774 INTEGER(mpi) :: ngrp
775 INTEGER(mpi) :: ir
776 INTEGER(mpi) :: irfrst
777 INTEGER(mpi) :: irlast
778 INTEGER(mpi) :: jb
779 INTEGER(mpi) :: jc
780 INTEGER(mpl) :: kk
781 INTEGER(mpl) :: length
782 INTEGER(mpl) :: ll
783 INTEGER(mpl) :: noffi
784 INTEGER(mpl), PARAMETER :: two=2
785 INTEGER(mpi), DIMENSION(:,:), ALLOCATABLE :: rowBlocksToGroups
786 INTEGER(mpi), DIMENSION(:), ALLOCATABLE :: blockCounter
787 INTEGER(mpi), DIMENSION(:), ALLOCATABLE :: groupList
788 !$ INTEGER(mpi) :: OMP_GET_THREAD_NUM
789
790 LOGICAL :: btest
791
792 !$POMP INST BEGIN(pblbits)
793 nd=npgrp(n+1)-npgrp(1) ! number of diagonal elements
794 mb=(nd+nac-1)/ibsize+1 ! max. number of blocks per row/column
795 mbt=(nd-1)/ibsize+1 ! max. number of blocks in triangular part
796 length=int(mb,mpl)*int(nthrd,mpl)
797 CALL mpalloc(blockcounter,length,'PBBITS: block counter')
798 length=int(n,mpl)*int(nthrd,mpl)
799 CALL mpalloc(grouplist,length,'PBBITS: group list')
800
801 ! mapping row blocks to parameters groups
802 length=int(mbt,mpl)
803 CALL mpalloc(rowblockstogroups,two,length,'mapping row blocks to par. groups (I)')
804 rowblockstogroups(:,:)=0
805 igrpf=1 ! first group of block
806 igrpl=1 ! last group of block
807 ir=1 ! first row of block
808 DO i=1,mbt
809 DO WHILE (igrpf < n .AND. npgrp(igrpf+1) <= ir)
810 igrpf=igrpf+1
811 END DO
812 rowblockstogroups(1,i)=igrpf
813 ir=ir+ibsize
814 DO WHILE (igrpl < n .AND. npgrp(igrpl+1) < ir)
815 igrpl=igrpl+1
816 END DO
817 rowblockstogroups(2,i)=igrpl
818 END DO
819
820 ll=0
821 ichunk=min((mbt+nthrd-1)/nthrd/32+1,256)
822 ! analyze bit fields ('upper half' (j>=i))
823 ! parallelize row loop
824 !$OMP PARALLEL DO &
825 !$OMP PRIVATE(IPROC,IOFFB,IOFFG,I,NOFFI,KK,LL,MM,NGRP,J,IR,IRFRST,IRLAST) &
826 !$OMP SCHEDULE(DYNAMIC,ICHUNK)
827 DO ib=1,mbt
828 irfrst=ibsize*(ib-1)+1 ! first row in block
829 irlast=ibsize*ib ! last row in block
830 iproc=0
831 !$ IPROC=OMP_GET_THREAD_NUM() ! thread number
832 ioffb=iproc*mb
833 ioffg=iproc*n
834 blockcounter(ioffb+1:ioffb+mb)=0
835 DO i=rowblockstogroups(1,ib),rowblockstogroups(2,ib)
836 noffi=int(i-1,mpl)*int(2*n+2-i,mpl)/2 ! row offset for I=J
837 ll=noffi/bs+i
838 mm=0
839 ngrp=0
840 DO j=i,n ! loop from diagonal element
841 IF (btest(bitfieldcounters(ll),mm)) THEN ! found group
842 ngrp=ngrp+1
843 grouplist(ioffg+ngrp)=j
844 END IF
845 mm=mm+1
846 IF (mm >= bs) THEN
847 ll=ll+1
848 mm=mm-bs
849 END IF
850 END DO
851 ! analyze rows (in overlap of group 'i 'and block 'ib')
852 DO ir=max(irfrst,npgrp(i)),min(irlast,npgrp(i+1)-1)
853 ! triangular part (parameter groups)
854 DO j=1,ngrp
855 ! column loop
856 DO jc=max(ir,npgrp(grouplist(ioffg+j))),npgrp(grouplist(ioffg+j)+1)-1
857 ! block number
858 jb=(jc-1)/ibsize+1
859 blockcounter(ioffb+jb)=blockcounter(ioffb+jb)+1
860 END DO
861 END DO
862 ! rectangular part (parameters, constraints)
863 noffi=ndimb+int(ir-1,mpl)*int(nac/bs+1,mpl)+1 ! row offset for J=1
864 ll=noffi ! row offset
865 mm=0
866 DO j=1,nac
867 IF (btest(bitfieldcounters(ll),mm)) THEN ! found group
868 ! block number
869 jb=(nd+j-1)/ibsize+1
870 blockcounter(ioffb+jb)=blockcounter(ioffb+jb)+1
871 END IF
872 mm=mm+1
873 IF (mm >= bs) THEN
874 ll=ll+1
875 mm=mm-bs
876 END IF
877 END DO
878 END DO
879 END DO
880 ! end of 'row' block
881 kk=nsparr(ib) ! offset for row block
882 DO j=1,mb
883 IF (blockcounter(ioffb+j) > 0) THEN
884 ! store used column block indices
885 nsparc(kk)=j
886 kk=kk+1
887 ENDIF
888 END DO
889 END DO
890 !$OMP END PARALLEL DO
891
892 ! 'empty' diagonal elements needed too
893 DO ib=mbt+1,mb
894 kk=nsparr(ib)
895 nsparc(kk)=ib
896 END DO
897
898 ! cleanup
899 CALL mpdealloc(grouplist)
900 CALL mpdealloc(blockcounter)
901 CALL mpdealloc(rowblockstogroups)
902
903 !$POMP INST END(pblbits)
904 WRITE(*,*) ' '
905 WRITE(*,*) 'PBLBITS: column list constructed ',nsparr(mb+1)-nsparr(1), ' words'
907 RETURN
908END SUBROUTINE pblbits
909
910
918SUBROUTINE prbits(npgrp,nsparr)
919 USE mpbits
920 USE mpdalc
921 IMPLICIT NONE
922
923 INTEGER(mpi), DIMENSION(:), INTENT(IN) :: npgrp
924 INTEGER(mpl), DIMENSION(:), INTENT(OUT) :: nsparr
925
926 INTEGER(mpi) :: ichunk
927 INTEGER(mpi) :: i
928 INTEGER(mpi) :: j
929 INTEGER(mpi) :: mm
930 INTEGER(mpi) :: nd
931 INTEGER(mpi) :: ir
932 INTEGER(mpl) :: ll
933 INTEGER(mpl) :: npar
934 INTEGER(mpl) :: nparc
935 INTEGER(mpl) :: ntot
936 INTEGER(mpl) :: noffi
937
938 LOGICAL :: btest
939
940 nd=npgrp(n+1)-npgrp(1) ! number of diagonal elements
941
942 ntot=0
943 ll=0
944 ichunk=min((n+nthrd-1)/nthrd/32+1,256)
945 ! analyze bit fields ('upper half' (j>=i))
946 ! parallelize row loop
947 ! private copy of NTOT for each thread, combined at end, init with 0.
948 !$OMP PARALLEL DO &
949 !$OMP PRIVATE(I,NOFFI,LL,MM,J,IR,NPAR,NPARC) &
950 !$OMP SCHEDULE(DYNAMIC,ICHUNK)
951 DO i=1,n
952 noffi=int(i-1,mpl)*int(2*n+2-i,mpl)/2 ! row offset for I=J
953 ll=noffi/bs+i
954 mm=0
955 npar=0
956 ! triangular part (parameter groups)
957 DO j=i,n ! loop from diagonal element
958 IF (btest(bitfieldcounters(ll),mm)) npar=npar+npgrp(j+1)-npgrp(j) ! number of parameters
959 mm=mm+1
960 IF (mm >= bs) THEN
961 ll=ll+1
962 mm=mm-bs
963 END IF
964 END DO
965
966 ! save number of parameters for row(s)
967 DO ir=npgrp(i),npgrp(i+1)-1
968 ! rectangular part (parameters, constraints)
969 noffi=ndimb+int(ir-1,mpl)*int(nac/bs+1,mpl)+1 ! row offset for J=1
970 ll=noffi ! row offset
971 mm=0
972 nparc=0
973 DO j=1,nac
974 IF (btest(bitfieldcounters(ll),mm)) THEN ! found parameter/constraint combination
975 nparc=nparc+1
976 END IF
977 mm=mm+1
978 IF (mm >= bs) THEN
979 ll=ll+1
980 mm=mm-bs
981 END IF
982 END DO
983 nsparr(ir+1)=npar+nparc
984 npar=npar-1
985 END DO
986
987 END DO
988 !$OMP END PARALLEL DO
989
990 ! sum up, fill row offsets
991 nsparr(1)=1
992 DO i=1,nd
993 nsparr(i+1)=nsparr(i+1)+nsparr(i)
994 END DO
995 ! 'empty' diagonal elements needed too
996 DO i=nd+1,nd+nac
997 nsparr(i+1)=nsparr(i)+1
998 END DO
999 ntot=nsparr(nd+nac+1)-nsparr(1)
1000
1001 WRITE(*,*) ' '
1002 WRITE(*,*) 'PRBITS: number of diagonal elements',nd+nac
1003 WRITE(*,*) 'PRBITS: number of used elements',ntot
1004 WRITE(*,1000) 'fraction of used elements', 200.0*real(ntot,mps)/real(nd+nac,mps)/real(nd+nac+1,mps)
10051000 FORMAT(' PRBITS: ',a,f6.2,' %')
1006 RETURN
1007END SUBROUTINE prbits
1008
1017SUBROUTINE pcbits(npgrp,nsparr,nsparc)
1018 USE mpbits
1019 USE mpdalc
1020 IMPLICIT NONE
1021
1022 INTEGER(mpi), DIMENSION(:), INTENT(IN) :: npgrp
1023 INTEGER(mpl), DIMENSION(:), INTENT(IN) :: nsparr
1024 INTEGER(mpl), DIMENSION(:), INTENT(OUT) :: nsparc
1025
1026 INTEGER(mpi) :: ichunk
1027 INTEGER(mpi) :: i
1028 INTEGER(mpi) :: j
1029 INTEGER(mpi) :: mm
1030 INTEGER(mpi) :: nd
1031 INTEGER(mpi) :: ic
1032 INTEGER(mpi) :: ir
1033 INTEGER(mpl) :: kk
1034 INTEGER(mpl) :: ll
1035
1036 INTEGER(mpl) :: noffi
1037 INTEGER(mpl) :: noffr
1038
1039 LOGICAL :: btest
1040
1041 nd=npgrp(n+1)-npgrp(1) ! number of diagonal elements
1042
1043 ll=0
1044 ichunk=min((n+nthrd-1)/nthrd/32+1,256)
1045 ! analyze bit fields ('upper half' (j>=i))
1046 ! parallelize row loop
1047 ! private copy of NTOT for each thread, combined at end, init with 0.
1048 !$OMP PARALLEL DO &
1049 !$OMP PRIVATE(I,NOFFI,LL,MM,J,IR,KK,IC) &
1050 !$OMP SCHEDULE(DYNAMIC,ICHUNK)
1051 DO i=1,n
1052 noffi=int(i-1,mpl)*int(2*n+2-i,mpl)/2 ! row offset for I=J
1053 ! fill column list for row(s)
1054 DO ir=npgrp(i),npgrp(i+1)-1
1055 ! triangular part (parameter groups)
1056 ll=noffi/bs+i
1057 mm=0
1058 kk=nsparr(ir)
1059 DO j=i,n ! loop from diagonal element
1060 IF (btest(bitfieldcounters(ll),mm)) THEN
1061 ! 'all' columns in group
1062 DO ic=max(ir,npgrp(j)),npgrp(j+1)-1
1063 nsparc(kk)=ic
1064 kk=kk+1
1065 END DO
1066 ENDIF
1067 mm=mm+1
1068 IF (mm >= bs) THEN
1069 ll=ll+1
1070 mm=mm-bs
1071 END IF
1072 END DO
1073 ! rectangular part (parameters, constraints)
1074 noffr=ndimb+int(ir-1,mpl)*int(nac/bs+1,mpl)+1 ! row offset for J=1
1075 ll=noffr ! row offset
1076 mm=0
1077 DO j=1,nac
1078 IF (btest(bitfieldcounters(ll),mm)) THEN ! found parameter/constraint combination
1079 nsparc(kk)=nd+j
1080 kk=kk+1
1081 END IF
1082 mm=mm+1
1083 IF (mm >= bs) THEN
1084 ll=ll+1
1085 mm=mm-bs
1086 END IF
1087 END DO
1088 END DO
1089
1090 END DO
1091 !$OMP END PARALLEL DO
1092
1093 ! 'empty' diagonal elements needed too
1094 DO ir=nd+1,nd+nac
1095 kk=nsparr(ir)
1096 nsparc(kk)=ir
1097 END DO
1098
1099 WRITE(*,*) ' '
1100 WRITE(*,*) 'PCBITS: column list constructed ',nsparr(nd+nac+1)-nsparr(1), ' words'
1102 RETURN
1103END SUBROUTINE pcbits
1104
1111SUBROUTINE ckbits(npgrp,ndims)
1112 USE mpbits
1113 IMPLICIT NONE
1114
1115 INTEGER(mpi), DIMENSION(:), INTENT(IN) :: npgrp
1116 INTEGER(mpl), DIMENSION(4), INTENT(OUT) :: ndims
1117
1118 INTEGER(mpi) :: nwcp(0:1)
1119 INTEGER(mpi) :: irgn(2)
1120 INTEGER(mpi) :: inr(2)
1121 INTEGER(mpl) :: ll
1122 INTEGER(mpl) :: noffi
1123 INTEGER(mpi) :: i
1124 INTEGER(mpi) :: j
1125 INTEGER(mpi) :: last
1126 INTEGER(mpi) :: lrgn
1127 INTEGER(mpi) :: next
1128 INTEGER(mpi) :: icp
1129 INTEGER(mpi) :: ib
1130 INTEGER(mpi) :: icount
1131 INTEGER(mpi) :: kbfw
1132 INTEGER(mpi) :: jp
1133 INTEGER(mpi) :: mm
1134 LOGICAL :: btest
1135
1136 DO i=1,4
1137 ndims(i)=0
1138 END DO
1139 kbfw=1
1140 IF (ibfw > 1) kbfw=2
1141 ll=0
1142
1143 DO i=1,n
1144 noffi=int(i-1,mpl)*int(i,mpl)*int(ibfw,mpl)/2
1145 ll=noffi/bs+i
1146 mm=0
1147 inr(1)=0
1148 inr(2)=0
1149 irgn(1)=1
1150 irgn(2)=1
1151 last=0
1152 lrgn=0
1153 DO j=1,i
1154 icount=0
1155 next=0
1156 DO ib=0,ibfw-1
1157 IF (btest(bitfieldcounters(ll),mm)) icount=ibset(icount,ib)
1158 mm=mm+1
1159 IF (mm >= bs) THEN
1160 ll=ll+1
1161 mm=mm-bs
1162 END IF
1163 END DO
1164
1165 IF (icount > 0) ndims(1)=ndims(1)+1
1166 ! keep pair ?
1167 IF (icount >= ireqpe) THEN
1168 next=1 ! double
1169 IF (icount <= isngpe) next=2 ! single
1170 inr(next)=inr(next)+npgrp(j+1)-npgrp(j) ! number of parameters
1171 IF (next /= last) THEN
1172 irgn(next)=irgn(next)+1
1173 END IF
1174 END IF
1175 last=next
1176 END DO
1177
1178 DO jp=1,kbfw
1179 IF (inr(jp) > 0) THEN
1180 icp=0
1181 nwcp(0)=inr(jp) ! list of column indices (default)
1182 nwcp(1)=irgn(jp)*2 ! list of regions (group starts and offsets)
1183 ! compress row ?
1184 IF ((nwcp(1) < nwcp(0)).OR.iextnd > 0) THEN
1185 icp=1
1186 END IF
1187 ! total space
1188 ndims(2) =ndims(2) +nwcp(icp)
1189 ndims(jp+2)=ndims(jp+2)+nwcp(0)*(npgrp(i+1)-npgrp(i))
1190 END IF
1191 END DO
1192
1193 END DO
1194
1195 RETURN
1196END SUBROUTINE ckbits
1197
1204SUBROUTINE spbits(npgrp,nsparr,nsparc) ! collect elements
1205 USE mpbits
1206 USE mpdalc
1207 IMPLICIT NONE
1208
1209 INTEGER(mpi), DIMENSION(:), INTENT(IN) :: npgrp
1210 INTEGER(mpl), DIMENSION(:,:), INTENT(IN) :: nsparr
1211 INTEGER(mpi), DIMENSION(:), INTENT(OUT) :: nsparc
1212
1213 INTEGER(mpl) :: kl
1214 INTEGER(mpl) :: l
1215 INTEGER(mpl) :: ll
1216 INTEGER(mpl) :: l1
1217 INTEGER(mpl) :: n1
1218 INTEGER(mpl) :: ndiff
1219 INTEGER(mpl) :: noffi
1220 INTEGER(mpl) :: noffj
1221 INTEGER(mpi) :: i
1222 INTEGER(mpi) :: j
1223 INTEGER(mpi) :: jb
1224 INTEGER(mpi) :: k
1225 INTEGER(mpi) :: m
1226 INTEGER(mpi) :: ichunk
1227 INTEGER(mpi) :: next
1228 INTEGER(mpi) :: last
1229
1230 LOGICAL :: btest
1231
1232 ichunk=min((n+nthrd-1)/nthrd/32+1,256)
1233 DO jb=0,nspc-1
1234 ! parallelize row loop
1235 !$OMP PARALLEL DO &
1236 !$OMP PRIVATE(I,N1,NOFFI,NOFFJ,L,M,KL,L1,NDIFF,LAST,LL,J,NEXT) &
1237 !$OMP SCHEDULE(DYNAMIC,ICHUNK)
1238 DO i=1,n
1239 n1=i+jb*(n+1)
1240 noffi=int(i-1,mpl)*int(i,mpl)*int(ibfw,mpl)/2
1241 l=noffi/bs+i
1242 m=jb
1243 kl=nsparr(1,n1) ! pointer to row in NSPARC
1244 l1=nsparr(2,n1) ! pointer to row in sparse matrix
1245 ndiff=(nsparr(1,n1+1)-kl)*(npgrp(i+1)-npgrp(i))-(nsparr(2,n1+1)-l1) ! 0 for no compression
1246 ll=l1
1247 last=0
1248
1249 DO j=1,i ! loop until diagonal element
1250 next=0
1251 IF(bitfieldcounters(l) /= 0) THEN
1252 IF(btest(bitfieldcounters(l),m)) THEN
1253 IF (ndiff == 0) THEN
1254 DO k=npgrp(j),npgrp(j+1)-1
1255 kl=kl+1
1256 nsparc(kl)=k ! column index
1257 END DO
1258 ELSE
1259 next=1
1260 IF (last == 0) THEN
1261 kl=kl+1
1262 nsparc(kl)=int(ll-l1,mpi)
1263 kl=kl+1
1264 nsparc(kl)=j
1265 END IF
1266 END IF
1267 ll=ll+(npgrp(j+1)-npgrp(j))
1268 END IF
1269 END IF
1270 last=next
1271 m=m+nspc
1272 IF (m >= bs) THEN
1273 m=m-bs
1274 l=l+1
1275 END IF
1276 END DO
1277
1278 ! extended storage ('2nd half' too) ?
1279 IF (iextnd > 0) THEN
1280 noffj=(i-1)*nspc
1281 m=int(mod(noffj,int(bs,mpl)),mpi)+jb
1282 last=0
1283 ! remaining columns
1284 DO j=i+1, n
1285 ! index for pair (J,I)
1286 noffi=int(j-1,mpl)*int(j,mpl)*int(ibfw,mpl)/2 ! for I=1
1287 l=noffi/bs+j+noffj/bs ! row offset + column offset
1288 next=0
1289 IF(btest(bitfieldcounters(l),m)) THEN
1290 IF (ndiff == 0) THEN
1291 DO k=npgrp(j),npgrp(j+1)-1
1292 kl=kl+1
1293 nsparc(kl)=k ! column index
1294 END DO
1295 ELSE
1296 next=1
1297 IF (last == 0) THEN
1298 kl=kl+1
1299 nsparc(kl)=int(ll-l1,mpi)
1300 kl=kl+1
1301 nsparc(kl)=j
1302 END IF
1303 END IF
1304 ll=ll+(npgrp(j+1)-npgrp(j))
1305 END IF
1306 last=next
1307
1308 END DO
1309 END IF
1310 ! next offset, end marker
1311 IF (ndiff /= 0) THEN
1312 kl=kl+1
1313 nsparc(kl)=int(ll-l1,mpi)
1314 kl=kl+1
1315 nsparc(kl)=n+1
1316 END IF
1317
1318 END DO
1319 !$OMP END PARALLEL DO
1320 END DO
1321
1322 n1=(n+1)*nspc
1323 WRITE(*,*) ' '
1324 WRITE(*,*) 'SPBITS: sparse structure constructed ',nsparr(1,n1), ' words'
1325 WRITE(*,*) 'SPBITS: dimension parameter of matrix',nsparr(2,1)
1326 IF (ibfw <= 1) THEN
1327 WRITE(*,*) 'SPBITS: index of last used location',nsparr(2,n1)
1328 ELSE
1329 WRITE(*,*) 'SPBITS: index of last used double',nsparr(2,n1/2)
1330 WRITE(*,*) 'SPBITS: index of last used single',nsparr(2,n1)
1331 END IF
1333 RETURN
1334END SUBROUTINE spbits
1335
1336
1340!
1341SUBROUTINE clbmap(in)
1342 USE mpbits
1343 USE mpdalc
1344 IMPLICIT NONE
1345
1346 INTEGER(mpi), INTENT(IN) :: in
1347
1348 INTEGER(mpl) :: noffd
1349 INTEGER(mpi) :: mb
1350
1351 ! save input parameter
1352 n2=in
1353 ! bit field array size
1354 noffd=int(n2,mpl)*int(n2-1,mpl)/2
1355 ndimb2=noffd/bs+n2
1356 mb=int(4.0e-6*real(ndimb2,mps),mpi)
1357 WRITE(*,*) ' '
1358 IF (mb > 0) THEN
1359 WRITE(*,*) 'CLBMAP: dimension of bit-map',ndimb2 , '(',mb,'MB)'
1360 ELSE
1361 WRITE(*,*) 'CLBMAP: dimension of bit-map',ndimb2 , '(< 1 MB)'
1362 END IF
1363 CALL mpalloc(bitmap,ndimb2,'INBMAP: bit storage')
1364 bitmap=0
1365 RETURN
1366END SUBROUTINE clbmap
1367
1373SUBROUTINE inbmap(im,jm) ! include element (I,J)
1374 USE mpbits
1375 IMPLICIT NONE
1376
1377 INTEGER(mpi), INTENT(IN) :: im
1378 INTEGER(mpi), INTENT(IN) :: jm
1379
1380 INTEGER(mpl) :: l
1381 INTEGER(mpi) :: i
1382 INTEGER(mpi) :: j
1383 INTEGER(mpi) :: noffj
1384 INTEGER(mpl) :: noffi
1385 INTEGER(mpi) :: m
1386
1387 IF(im == jm) RETURN ! diagonal
1388 j=min(im,jm)
1389 i=max(im,jm)
1390 IF(j <= 0) RETURN ! out low
1391 IF(i > n2) RETURN ! out high
1392 noffi=int(i-1,mpl)*int(i-2,mpl)/2 ! for J=1
1393 noffj=(j-1)
1394 l=noffi/bs+i+noffj/bs ! row offset + column offset
1395 ! add I instead of 1 to keep bit maps of different rows in different words (openMP !)
1396 m=mod(noffj,bs)
1397 bitmap(l)=ibset(bitmap(l),m)
1398 RETURN
1399END SUBROUTINE inbmap
1400
1407SUBROUTINE gpbmap(ngroup,npgrp,npair)
1408 USE mpbits
1409 IMPLICIT NONE
1410
1411 INTEGER(mpi), INTENT(IN) :: ngroup
1412 INTEGER(mpi), DIMENSION(:,:), INTENT(IN) :: npgrp
1413 INTEGER(mpi), DIMENSION(:), INTENT(OUT) :: npair
1414
1415 INTEGER(mpl) :: l
1416 INTEGER(mpl) :: noffi
1417 INTEGER(mpi) :: i
1418 INTEGER(mpi) :: j
1419 INTEGER(mpi) :: m
1420 LOGICAL :: btest
1421
1422 npair(1:ngroup)=0
1423 l=0
1424
1425 DO i=1,ngroup
1426 npair(i)=npair(i)+npgrp(2,i)-1 ! from own group
1427 noffi=int(i-1,mpl)*int(i-2,mpl)/2
1428 l=noffi/bs+i
1429 m=0
1430 DO j=1,i-1
1431 IF (btest(bitmap(l),m)) THEN
1432 ! from other group
1433 npair(i)=npair(i)+npgrp(2,j)
1434 npair(j)=npair(j)+npgrp(2,i)
1435 END IF
1436 m=m+1
1437 IF (m >= bs) THEN
1438 l=l+1
1439 m=m-bs
1440 END IF
1441 END DO
1442 END DO
1443
1444 RETURN
1445END SUBROUTINE gpbmap
1446
1453SUBROUTINE ggbmap(ipgrp,npair,npgrp)
1454 USE mpbits
1455 IMPLICIT NONE
1456
1457 INTEGER(mpi), INTENT(IN) :: ipgrp
1458 INTEGER(mpi), INTENT(OUT) :: npair
1459 INTEGER(mpi), DIMENSION(:), INTENT(OUT) :: npgrp
1460
1461 INTEGER(mpl) :: l
1462 INTEGER(mpl) :: noffi
1463 INTEGER(mpi) :: noffj
1464 INTEGER(mpi) :: i
1465 INTEGER(mpi) :: j
1466 LOGICAL :: btest
1467
1468 npair=0
1469
1470 i=ipgrp
1471 noffi=int(i-1,mpl)*int(i-2,mpl)/2 ! for J=1
1472 l=noffi/bs+i! row offset
1473 ! add I instead of 1 to keep bit maps of different rows in different words (openMP !)
1474 DO j=1,ipgrp-1
1475 noffj=j-1
1476 IF (btest(bitmap(l+noffj/bs),mod(noffj,bs))) THEN
1477 npair=npair+1
1478 npgrp(npair)=j
1479 END IF
1480 END DO
1481
1482 noffj=ipgrp-1
1483 DO i=ipgrp+1,n2
1484 noffi=int(i-1,mpl)*int(i-2,mpl)/2 ! for J=1
1485 l=noffi/bs+i ! row offset
1486 IF (btest(bitmap(l+noffj/bs),mod(noffj,bs))) THEN
1487 npair=npair+1
1488 npgrp(npair)=i
1489 END IF
1490 END DO
1491
1492 RETURN
1493END SUBROUTINE ggbmap
allocate array
Definition: mpdalc.f90:36
deallocate array
Definition: mpdalc.f90:42
subroutine pcbits(npgrp, nsparr, nsparc)
Analyze bit fields.
Definition: mpbits.f90:1018
subroutine ndbits(npgrp, ndims, nsparr, ihst)
Analyze bit fields.
Definition: mpbits.f90:302
subroutine clbits(in, jreqpe, jhispe, jsngpe, jextnd, idimb, ispc)
Calculate bit (field) array size, encoding.
Definition: mpbits.f90:179
subroutine plbits(in, inar, inac, idimb)
Calculate bit field array size (PARDISO).
Definition: mpbits.f90:252
subroutine spbits(npgrp, nsparr, nsparc)
Create sparsity information.
Definition: mpbits.f90:1205
subroutine irbits(i, j)
Fill bit fields (counters, rectangular part).
Definition: mpbits.f90:146
subroutine clbmap(in)
Clear (additional) bit map.
Definition: mpbits.f90:1342
subroutine inbmap(im, jm)
Fill bit map.
Definition: mpbits.f90:1374
subroutine ckbits(npgrp, ndims)
Check sparsity of matrix.
Definition: mpbits.f90:1112
subroutine ggbmap(ipgrp, npair, npgrp)
Get paired (parameter) groups from map.
Definition: mpbits.f90:1454
subroutine prbits(npgrp, nsparr)
Analyze bit fields.
Definition: mpbits.f90:919
subroutine gpbmap(ngroup, npgrp, npair)
Get pairs (statistic) from map.
Definition: mpbits.f90:1408
subroutine pblbits(npgrp, ibsize, nsparr, nsparc)
Analyze bit fields.
Definition: mpbits.f90:752
subroutine pbsbits(npgrp, ibsize, nnzero, nblock, nbkrow)
Analyze bit fields.
Definition: mpbits.f90:575
subroutine inbits(im, jm, inc)
Fill bit fields (counters, triangular part).
Definition: mpbits.f90:70
subroutine hmpent(ih, x)
entry flt.pt.
Definition: mphistab.f90:183
Bit field data.
Definition: mpbits.f90:40
integer(mpi) n2
matrix size (map)
Definition: mpbits.f90:49
integer(mpi) ireqpe
min number of pair entries
Definition: mpbits.f90:51
integer(mpl) ndimb2
dimension for bit map
Definition: mpbits.f90:45
integer(mpi) ibfw
bit field width
Definition: mpbits.f90:50
integer(mpi) iextnd
flag for extended storage (both 'halves' of sym.
Definition: mpbits.f90:53
integer(mpi) n
matrix size (counters, sparse, triangular part)
Definition: mpbits.f90:46
integer(mpi), parameter bs
number of bits in INTEGER(mpi)
Definition: mpbits.f90:59
integer(mpi) nspc
number of precision for sparse global matrix (1=D, 2=D+f)
Definition: mpbits.f90:54
integer(mpi) nar
additional rows (counters, sparse, rectangular part)
Definition: mpbits.f90:47
integer(mpi) isngpe
upper bound for pair entry single precision storage
Definition: mpbits.f90:52
integer(mpi), dimension(:), allocatable bitmap
fit field map for global parameters pairs (measurements)
Definition: mpbits.f90:58
integer(mpi) mxcnt
max value for bit field counters
Definition: mpbits.f90:55
integer(mpi) nac
additional columns (counters, sparse, rectangular part)
Definition: mpbits.f90:48
integer(mpi) nthrd
number of threads
Definition: mpbits.f90:56
integer(mpl) ndimb
dimension for bit (field) array
Definition: mpbits.f90:44
integer(mpi), dimension(:), allocatable bitfieldcounters
fit field counters for global parameters pairs (tracks)
Definition: mpbits.f90:57
(De)Allocate vectors and arrays.
Definition: mpdalc.f90:24
Definition of constants.
Definition: mpdef.f90:24