make_threads Subroutine

private subroutine make_threads(dist, row_sizes)

Creates a distribution for threads

Presence of row_sizes When row_sizes is present then the thread distribution attempts to distribute rows to threads such that the sum of delegated row sizes is approximately matched for all rows. When row_sizes is not present then a random distribution is chosen.

Arguments

Type IntentOptional Attributes Name
type(dbcsr_distribution_obj), POINTER :: dist

Add thread distribution to this distribution

integer, intent(in), optional, DIMENSION(:) :: row_sizes

row block sizes


Source Code

   SUBROUTINE make_threads(dist, row_sizes)
      !! Creates a distribution for threads
      !!
      !! Presence of row_sizes
      !! When row_sizes is present then the thread distribution
      !! attempts to distribute rows to threads such that the sum of
      !! delegated row sizes is approximately matched for all rows.
      !! When row_sizes is not present then a random distribution is chosen.

      TYPE(dbcsr_distribution_obj), POINTER              :: dist
         !! Add thread distribution to this distribution
      INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL        :: row_sizes
         !! row block sizes

      INTEGER                                            :: block_size, block_size0, cur_block, &
                                                            group_size, i, last_row, nlrows, &
                                                            nrows, nthreads, row, t, t_cnt
      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: reorder, sorted_row_sizes
      INTEGER, DIMENSION(:), POINTER, CONTIGUOUS         :: lrows, td
      LOGICAL                                            :: assigned, found
      REAL(kind=sp)                                      :: load_fraction, rn, soft_thr
      TYPE(dbcsr_heap_type)                              :: t_heap

!   ---------------------------------------------------------------------------

      nthreads = 1
!$    nthreads = OMP_GET_NUM_THREADS()
!$    IF (dist%d%num_threads /= nthreads) &
!$       DBCSR_ABORT("Thread number has changed")
      nrows = dbcsr_distribution_nrows(dist)
      nlrows = dbcsr_distribution_nlocal_rows(dist)
      lrows => dbcsr_distribution_local_rows(dist)

!$OMP     BARRIER
!$OMP     MASTER

      load_fraction = REAL(dbcsr_cfg%comm_thread_load%val)/100.0
      IF (nthreads == 1) load_fraction = 1.0

      IF (.NOT. dist%d%has_thread_dist) THEN
         dist%d%num_threads = nthreads
         group_size = 0; cur_block = 0

         ALLOCATE (td(nrows))
         dist%d%has_thread_dist = .TRUE.
         CALL array_new(dist%d%thread_dist, td, gift=.TRUE.)
         td => array_data(dist%d%thread_dist)

         IF (PRESENT(row_sizes)) THEN
            ! The goal is to distribute rows to threads as equally as
            ! possible. The row sizes are first sorted. Each group of
            ! equally sized rows (group_size rows of size cur_block) is
            ! distributed to threads (keeping consecutive rows
            ! together). The group is divided into equally-sized blocks
            ! (block_size0, block_size).  Leftover rows (those that can
            ! not be equally distributed to threads) are then assigned
            ! to threads so that each thread's cumulative load attempts
            ! to be equal. This distribution is achieved using a heap.
            !
            ! The heap is used to distribute "leftover"rows to threads.
            ! Leftover rows are those of the same size that can not be
            ! evenly distributed among all threads.
            CALL dbcsr_heap_new(t_heap, nthreads - 1)
            ! We do not want thread 0 to be in the heap.
            CALL dbcsr_heap_fill(t_heap, (/(0, i=1, nthreads - 1)/))
            ALLOCATE (sorted_row_sizes(nrows))
            ALLOCATE (reorder(nrows))
            sorted_row_sizes(:) = row_sizes(:)
            CALL sort(sorted_row_sizes, nrows, reorder)

            row = 1
            DO WHILE (row .LE. nrows)
               cur_block = sorted_row_sizes(nrows - row + 1)
               assigned = .FALSE.
               group_size = 0

               last_row = nrows - row + 1
               DO i = last_row, 1, -1
                  IF (cur_block == sorted_row_sizes(i)) THEN
                     group_size = group_size + 1
                     row = row + 1
                  ELSE
                     EXIT
                  END IF
               END DO

               soft_thr = load_fraction + nthreads - 1
               block_size0 = INT(load_fraction*(group_size/soft_thr))
               block_size = INT(group_size/soft_thr)

               !blocks for master thread
               IF (block_size0 > 0) &
                  td(reorder(last_row:last_row - block_size0 + 1:-1)) = 0

               !Other threads
               IF (block_size > 0) THEN
                  DO t = 1, nthreads - 1
                     td(reorder(last_row - block_size0 - (t - 1)*block_size: &
                                last_row - block_size0 - (t)*block_size + 1:-1)) = t
                  END DO
               END IF

               !Leftover bocks
               DO i = last_row - block_size0 - (nthreads - 1)*block_size, last_row + 1 - group_size, -1
                  CALL dbcsr_heap_get_first(t_heap, t, t_cnt, found)
                  t_cnt = t_cnt + cur_block
                  CALL dbcsr_heap_reset_first(t_heap, t_cnt)
                  td(reorder(i)) = t
               END DO

            END DO
            CALL dbcsr_heap_release(t_heap)
            DEALLOCATE (sorted_row_sizes)
            DEALLOCATE (reorder)
         ELSE
            DO t = 1, nrows
               IF (.FALSE.) THEN
                  td(t) = MOD(t - 1, nthreads)
               ELSE
                  CALL RANDOM_NUMBER(rn)
                  ! Makes sure the numbers are in the proper integer range.
                  td(t) = MOD(INT(rn*REAL(nthreads)), nthreads)
               END IF
            END DO
         END IF
      END IF
!$OMP     END MASTER
   END SUBROUTINE make_threads