dbcsr_redistribute Subroutine

public subroutine dbcsr_redistribute(matrix, redist)

Redistributes a DBCSR matrix. The new distribution should have compatible row and column blocks.

this should be changed to be like the make images (i.e., copy data in finalize, not here & now)

Arguments

TypeIntentOptionalAttributesName
type(dbcsr_type), intent(in) :: matrix

matrix to redistribute

type(dbcsr_type), intent(inout) :: redist

redistributed matrix, which should already be created


Contents

Source Code


Source Code

   SUBROUTINE dbcsr_redistribute(matrix, redist)
      !! Redistributes a DBCSR matrix.
      !! The new distribution should have compatible row and column blocks.

      TYPE(dbcsr_type), INTENT(IN)                       :: matrix
         !! matrix to redistribute
      TYPE(dbcsr_type), INTENT(INOUT)                    :: redist
         !! redistributed matrix, which should already be created

      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_redistribute'
      INTEGER, PARAMETER                                 :: metalen = 2
      LOGICAL, PARAMETER                                 :: dbg = .FALSE.

      INTEGER :: blk, blk_ps, blks, col, col_size, data_type, dst_p, handle, meta_l, mp_group, &
                 numnodes, nze, row, row_size, src_p, stored_col_new, stored_row_new
      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: rd_disp, recv_meta, rm_disp, sd_disp, &
                                                            sdp, send_meta, sm_disp, smp
      INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: recv_count, send_count, &
                                                            total_recv_count, total_send_count
      INTEGER, DIMENSION(:), POINTER                     :: col_blk_size_new, col_dist_new, &
                                                            row_blk_size_new, row_dist_new
      INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
      LOGICAL                                            :: sym_tr, tr
      TYPE(dbcsr_data_obj)                               :: data_block, recv_data, send_data
      TYPE(dbcsr_distribution_obj)                       :: dist_new
      TYPE(dbcsr_iterator)                               :: iter
      TYPE(dbcsr_mp_obj)                                 :: mp_obj_new

!   ---------------------------------------------------------------------------

      CALL timeset(routineN, handle)
      !call dbcsr_print_dist (matrix%dist)
      !call dbcsr_print_dist (redist%dist)
      IF (.NOT. dbcsr_valid_index(matrix)) &
         DBCSR_ABORT("Input not valid.")
      IF (matrix%replication_type .NE. dbcsr_repl_none) &
         DBCSR_WARN("Can not redistribute replicated matrix.")
      data_type = matrix%data_type
      ! Get row and column start and end positions
      ! Old matrix
      ! New matrix
      dist_new = dbcsr_distribution(redist)
      row_blk_size_new => array_data(redist%row_blk_size)
      col_blk_size_new => array_data(redist%col_blk_size)
      row_dist_new => dbcsr_distribution_row_dist(dist_new)
      col_dist_new => dbcsr_distribution_col_dist(dist_new)
      !
      mp_obj_new = dbcsr_distribution_mp(dist_new)
      pgrid => dbcsr_mp_pgrid(mp_obj_new)
      numnodes = dbcsr_mp_numnodes(mp_obj_new)
      mp_group = dbcsr_mp_group(mp_obj_new)
      !
      IF (MAXVAL(row_dist_new) .GT. UBOUND(pgrid, 1)) &
         DBCSR_ABORT('Row distribution references unexistent processor rows')
      IF (dbg) THEN
         IF (MAXVAL(row_dist_new) .NE. UBOUND(pgrid, 1)) &
            DBCSR_WARN('Range of row distribution not equal to processor rows')
      END IF
      IF (MAXVAL(col_dist_new) .GT. UBOUND(pgrid, 2)) &
         DBCSR_ABORT('Col distribution references unexistent processor cols')
      IF (dbg) THEN
         IF (MAXVAL(col_dist_new) .NE. UBOUND(pgrid, 2)) &
            DBCSR_WARN('Range of col distribution not equal to processor cols')
      END IF
      ALLOCATE (send_count(2, 0:numnodes - 1))
      ALLOCATE (recv_count(2, 0:numnodes - 1))
      ALLOCATE (total_send_count(2, 0:numnodes - 1))
      ALLOCATE (total_recv_count(2, 0:numnodes - 1))
      ALLOCATE (sdp(0:numnodes - 1))
      ALLOCATE (sd_disp(0:numnodes - 1))
      ALLOCATE (smp(0:numnodes - 1))
      ALLOCATE (sm_disp(0:numnodes - 1))
      ALLOCATE (rd_disp(0:numnodes - 1))
      ALLOCATE (rm_disp(0:numnodes - 1))
      ! Count initial sizes for sending.
      !
      send_count(:, :) = 0
      CALL dbcsr_iterator_start(iter, matrix)
      dst_p = -1
      DO WHILE (dbcsr_iterator_blocks_left(iter))
         CALL dbcsr_iterator_next_block(iter, row, col, blk, tr, &
                                        row_size=row_size, col_size=col_size)
         sym_tr = .FALSE.
         CALL dbcsr_get_stored_coordinates(redist, &
                                           row, col, dst_p)
         nze = row_size*col_size
         send_count(1, dst_p) = send_count(1, dst_p) + 1
         send_count(2, dst_p) = send_count(2, dst_p) + nze
      END DO
      CALL dbcsr_iterator_stop(iter)
      CALL mp_alltoall(send_count, recv_count, 2, mp_group)
      ! Allocate data structures needed for data exchange.
      CALL dbcsr_data_init(recv_data)
      CALL dbcsr_data_new(recv_data, data_type, SUM(recv_count(2, :)))
      ALLOCATE (recv_meta(metalen*SUM(recv_count(1, :))))
      CALL dbcsr_data_init(send_data)
      CALL dbcsr_data_new(send_data, data_type, SUM(send_count(2, :)))
      ALLOCATE (send_meta(metalen*SUM(send_count(1, :))))
      ! Fill in the meta data structures and copy the data.
      DO dst_p = 0, numnodes - 1
         total_send_count(1, dst_p) = send_count(1, dst_p)
         total_send_count(2, dst_p) = send_count(2, dst_p)
         total_recv_count(1, dst_p) = recv_count(1, dst_p)
         total_recv_count(2, dst_p) = recv_count(2, dst_p)
      END DO
      sd_disp = -1; sm_disp = -1
      rd_disp = -1; rm_disp = -1
      sd_disp(0) = 1; sm_disp(0) = 1
      rd_disp(0) = 1; rm_disp(0) = 1
      DO dst_p = 1, numnodes - 1
         sm_disp(dst_p) = sm_disp(dst_p - 1) &
                          + metalen*total_send_count(1, dst_p - 1)
         sd_disp(dst_p) = sd_disp(dst_p - 1) &
                          + total_send_count(2, dst_p - 1)
         rm_disp(dst_p) = rm_disp(dst_p - 1) &
                          + metalen*total_recv_count(1, dst_p - 1)
         rd_disp(dst_p) = rd_disp(dst_p - 1) &
                          + total_recv_count(2, dst_p - 1)
      END DO
      sdp(:) = sd_disp     ! sdp points to the the next place to store
      ! data. It is postincremented.
      smp(:) = sm_disp - metalen  ! But smp points to the "working" data, not
      ! the next. It is pre-incremented, so we must
      ! first rewind it.
      CALL dbcsr_data_init(data_block)
      CALL dbcsr_data_new(data_block, data_type)
      CALL dbcsr_iterator_start(iter, matrix)
      dst_p = -1
      DO WHILE (dbcsr_iterator_blocks_left(iter))
         CALL dbcsr_iterator_next_block(iter, row, col, data_block, tr, blk)
         !IF (tr) WRITE(*,*)"block at",row,col," is transposed"
         sym_tr = .FALSE.
         CALL dbcsr_get_stored_coordinates(redist, &
                                           row, col, dst_p)
         smp(dst_p) = smp(dst_p) + metalen
         IF (tr) THEN
            send_meta(smp(dst_p)) = -row
         ELSE
            send_meta(smp(dst_p)) = row
         END IF
         send_meta(smp(dst_p) + 1) = col ! new blocked column
         nze = dbcsr_data_get_size(data_block)
         CALL dbcsr_data_set(send_data, lb=sdp(dst_p), data_size=nze, &
                             src=data_block, source_lb=1)
         !send_data(sdp(dst_p):sdp(dst_p)+SIZE(r_dp)-1) &
         !     = r_dp(:)
         sdp(dst_p) = sdp(dst_p) + nze
      END DO
      CALL dbcsr_iterator_stop(iter)
      CALL dbcsr_data_clear_pointer(data_block)
      ! Exchange the data and metadata structures.
      SELECT CASE (data_type)
      CASE (dbcsr_type_real_4)
         CALL hybrid_alltoall_s1( &
            send_data%d%r_sp(:), total_send_count(2, :), sd_disp(:) - 1, &
            recv_data%d%r_sp(:), total_recv_count(2, :), rd_disp(:) - 1, &
            mp_obj_new)
      CASE (dbcsr_type_real_8)
         !CALL mp_alltoall(&
         !     send_data%d%r_dp(:), total_send_count(2,:), sd_disp(:)-1,&
         !     recv_data%d%r_dp(:), total_recv_count(2,:), rd_disp(:)-1,&
         !     mp_group)
         CALL hybrid_alltoall_d1( &
            send_data%d%r_dp(:), total_send_count(2, :), sd_disp(:) - 1, &
            recv_data%d%r_dp(:), total_recv_count(2, :), rd_disp(:) - 1, &
            mp_obj_new)
      CASE (dbcsr_type_complex_4)
         CALL hybrid_alltoall_c1( &
            send_data%d%c_sp(:), total_send_count(2, :), sd_disp(:) - 1, &
            recv_data%d%c_sp(:), total_recv_count(2, :), rd_disp(:) - 1, &
            mp_obj_new)
      CASE (dbcsr_type_complex_8)
         CALL hybrid_alltoall_z1( &
            send_data%d%c_dp(:), total_send_count(2, :), sd_disp(:) - 1, &
            recv_data%d%c_dp(:), total_recv_count(2, :), rd_disp(:) - 1, &
            mp_obj_new)
      END SELECT
      !CALL mp_alltoall(send_data(:), total_send_count(2,:), sd_disp(:)-1,&
      !     recv_data(:), total_recv_count(2,:), rd_disp(:)-1, mp_group)
      CALL hybrid_alltoall_i1(send_meta(:), metalen*total_send_count(1, :), sm_disp(:) - 1, &
                              recv_meta(:), metalen*total_recv_count(1, :), rm_disp(:) - 1, mp_obj_new)
      ! Now fill in the data.
      CALL dbcsr_work_create(redist, &
                             SUM(recv_count(1, :)), &
                             SUM(recv_count(2, :)), work_mutable=.FALSE., n=1)
      !
      blk_ps = 1
      blks = 0
      DO src_p = 0, numnodes - 1
         !data_offset_l = rd_disp(src_p)
         DO meta_l = 1, recv_count(1, src_p)
            row = recv_meta(rm_disp(src_p) + metalen*(meta_l - 1))
            tr = row .LT. 0
            stored_row_new = ABS(row)
            stored_col_new = recv_meta(rm_disp(src_p) + metalen*(meta_l - 1) + 1)
            nze = row_blk_size_new(stored_row_new)*col_blk_size_new(stored_col_new)
            !r_dp => recv_data(blk_ps:blk_ps+nze-1)
            !CALL dbcsr_put_block(redist, stored_row_new, stored_col_new, r_dp, tr)
            !### this should be changed to be like the make images (i.e., copy data in finalize, not here & now)
            data_block = pointer_view(data_block, recv_data, blk_ps, nze)
            CALL dbcsr_put_block(redist, stored_row_new, stored_col_new, data_block, transposed=tr)
            blk_ps = blk_ps + nze
            blks = blks + 1
         END DO
      END DO
      CALL dbcsr_data_clear_pointer(data_block)
      CALL dbcsr_data_release(data_block)
      !
      IF (dbg) THEN
         WRITE (*, *) routineN//" Declared blocks=", redist%wms(1)%lastblk, &
            "actual=", blks
         WRITE (*, *) routineN//" Declared data size=", redist%wms(1)%datasize, &
            "actual=", blk_ps
      END IF
      CALL dbcsr_finalize(redist)
      CALL dbcsr_data_release(recv_data)
      CALL dbcsr_data_release(send_data)
      DEALLOCATE (send_count)
      DEALLOCATE (recv_count)
      DEALLOCATE (sdp); DEALLOCATE (sd_disp)
      DEALLOCATE (smp); DEALLOCATE (sm_disp)
      DEALLOCATE (rd_disp)
      DEALLOCATE (rm_disp)
      DEALLOCATE (recv_meta)
      DEALLOCATE (send_meta)
      CALL timestop(handle)
   END SUBROUTINE dbcsr_redistribute