Redistributes a DBCSR matrix. The new distribution should have compatible row and column blocks.
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(dbcsr_type), | intent(in) | :: | matrix |
matrix to redistribute |
||
type(dbcsr_type), | intent(inout) | :: | redist |
redistributed matrix, which should already be created |
SUBROUTINE dbcsr_redistribute(matrix, redist)
!! Redistributes a DBCSR matrix.
!! The new distribution should have compatible row and column blocks.
TYPE(dbcsr_type), INTENT(IN) :: matrix
!! matrix to redistribute
TYPE(dbcsr_type), INTENT(INOUT) :: redist
!! redistributed matrix, which should already be created
CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_redistribute'
INTEGER, PARAMETER :: metalen = 2
LOGICAL, PARAMETER :: dbg = .FALSE.
INTEGER :: blk, blk_ps, blks, col, col_size, data_type, dst_p, handle, meta_l, mp_group, &
numnodes, nze, row, row_size, src_p, stored_col_new, stored_row_new
INTEGER, ALLOCATABLE, DIMENSION(:) :: rd_disp, recv_meta, rm_disp, sd_disp, &
sdp, send_meta, sm_disp, smp
INTEGER, ALLOCATABLE, DIMENSION(:, :) :: recv_count, send_count, &
total_recv_count, total_send_count
INTEGER, DIMENSION(:), POINTER :: col_blk_size_new, col_dist_new, &
row_blk_size_new, row_dist_new
INTEGER, DIMENSION(:, :), POINTER :: pgrid
LOGICAL :: sym_tr, tr
TYPE(dbcsr_data_obj) :: data_block, recv_data, send_data
TYPE(dbcsr_distribution_obj) :: dist_new
TYPE(dbcsr_iterator) :: iter
TYPE(dbcsr_mp_obj) :: mp_obj_new
! ---------------------------------------------------------------------------
CALL timeset(routineN, handle)
!call dbcsr_print_dist (matrix%dist)
!call dbcsr_print_dist (redist%dist)
IF (.NOT. dbcsr_valid_index(matrix)) &
DBCSR_ABORT("Input not valid.")
IF (matrix%replication_type .NE. dbcsr_repl_none) &
DBCSR_WARN("Can not redistribute replicated matrix.")
data_type = matrix%data_type
! Get row and column start and end positions
! Old matrix
! New matrix
dist_new = dbcsr_distribution(redist)
row_blk_size_new => array_data(redist%row_blk_size)
col_blk_size_new => array_data(redist%col_blk_size)
row_dist_new => dbcsr_distribution_row_dist(dist_new)
col_dist_new => dbcsr_distribution_col_dist(dist_new)
!
mp_obj_new = dbcsr_distribution_mp(dist_new)
pgrid => dbcsr_mp_pgrid(mp_obj_new)
numnodes = dbcsr_mp_numnodes(mp_obj_new)
mp_group = dbcsr_mp_group(mp_obj_new)
!
IF (MAXVAL(row_dist_new) .GT. UBOUND(pgrid, 1)) &
DBCSR_ABORT('Row distribution references unexistent processor rows')
IF (dbg) THEN
IF (MAXVAL(row_dist_new) .NE. UBOUND(pgrid, 1)) &
DBCSR_WARN('Range of row distribution not equal to processor rows')
END IF
IF (MAXVAL(col_dist_new) .GT. UBOUND(pgrid, 2)) &
DBCSR_ABORT('Col distribution references unexistent processor cols')
IF (dbg) THEN
IF (MAXVAL(col_dist_new) .NE. UBOUND(pgrid, 2)) &
DBCSR_WARN('Range of col distribution not equal to processor cols')
END IF
ALLOCATE (send_count(2, 0:numnodes - 1))
ALLOCATE (recv_count(2, 0:numnodes - 1))
ALLOCATE (total_send_count(2, 0:numnodes - 1))
ALLOCATE (total_recv_count(2, 0:numnodes - 1))
ALLOCATE (sdp(0:numnodes - 1))
ALLOCATE (sd_disp(0:numnodes - 1))
ALLOCATE (smp(0:numnodes - 1))
ALLOCATE (sm_disp(0:numnodes - 1))
ALLOCATE (rd_disp(0:numnodes - 1))
ALLOCATE (rm_disp(0:numnodes - 1))
! Count initial sizes for sending.
!
send_count(:, :) = 0
CALL dbcsr_iterator_start(iter, matrix)
dst_p = -1
DO WHILE (dbcsr_iterator_blocks_left(iter))
CALL dbcsr_iterator_next_block(iter, row, col, blk, tr, &
row_size=row_size, col_size=col_size)
sym_tr = .FALSE.
CALL dbcsr_get_stored_coordinates(redist, &
row, col, dst_p)
nze = row_size*col_size
send_count(1, dst_p) = send_count(1, dst_p) + 1
send_count(2, dst_p) = send_count(2, dst_p) + nze
END DO
CALL dbcsr_iterator_stop(iter)
CALL mp_alltoall(send_count, recv_count, 2, mp_group)
! Allocate data structures needed for data exchange.
CALL dbcsr_data_init(recv_data)
CALL dbcsr_data_new(recv_data, data_type, SUM(recv_count(2, :)))
ALLOCATE (recv_meta(metalen*SUM(recv_count(1, :))))
CALL dbcsr_data_init(send_data)
CALL dbcsr_data_new(send_data, data_type, SUM(send_count(2, :)))
ALLOCATE (send_meta(metalen*SUM(send_count(1, :))))
! Fill in the meta data structures and copy the data.
DO dst_p = 0, numnodes - 1
total_send_count(1, dst_p) = send_count(1, dst_p)
total_send_count(2, dst_p) = send_count(2, dst_p)
total_recv_count(1, dst_p) = recv_count(1, dst_p)
total_recv_count(2, dst_p) = recv_count(2, dst_p)
END DO
sd_disp = -1; sm_disp = -1
rd_disp = -1; rm_disp = -1
sd_disp(0) = 1; sm_disp(0) = 1
rd_disp(0) = 1; rm_disp(0) = 1
DO dst_p = 1, numnodes - 1
sm_disp(dst_p) = sm_disp(dst_p - 1) &
+ metalen*total_send_count(1, dst_p - 1)
sd_disp(dst_p) = sd_disp(dst_p - 1) &
+ total_send_count(2, dst_p - 1)
rm_disp(dst_p) = rm_disp(dst_p - 1) &
+ metalen*total_recv_count(1, dst_p - 1)
rd_disp(dst_p) = rd_disp(dst_p - 1) &
+ total_recv_count(2, dst_p - 1)
END DO
sdp(:) = sd_disp ! sdp points to the the next place to store
! data. It is postincremented.
smp(:) = sm_disp - metalen ! But smp points to the "working" data, not
! the next. It is pre-incremented, so we must
! first rewind it.
CALL dbcsr_data_init(data_block)
CALL dbcsr_data_new(data_block, data_type)
CALL dbcsr_iterator_start(iter, matrix)
dst_p = -1
DO WHILE (dbcsr_iterator_blocks_left(iter))
CALL dbcsr_iterator_next_block(iter, row, col, data_block, tr, blk)
!IF (tr) WRITE(*,*)"block at",row,col," is transposed"
sym_tr = .FALSE.
CALL dbcsr_get_stored_coordinates(redist, &
row, col, dst_p)
smp(dst_p) = smp(dst_p) + metalen
IF (tr) THEN
send_meta(smp(dst_p)) = -row
ELSE
send_meta(smp(dst_p)) = row
END IF
send_meta(smp(dst_p) + 1) = col ! new blocked column
nze = dbcsr_data_get_size(data_block)
CALL dbcsr_data_set(send_data, lb=sdp(dst_p), data_size=nze, &
src=data_block, source_lb=1)
!send_data(sdp(dst_p):sdp(dst_p)+SIZE(r_dp)-1) &
! = r_dp(:)
sdp(dst_p) = sdp(dst_p) + nze
END DO
CALL dbcsr_iterator_stop(iter)
CALL dbcsr_data_clear_pointer(data_block)
! Exchange the data and metadata structures.
SELECT CASE (data_type)
CASE (dbcsr_type_real_4)
CALL hybrid_alltoall_s1( &
send_data%d%r_sp(:), total_send_count(2, :), sd_disp(:) - 1, &
recv_data%d%r_sp(:), total_recv_count(2, :), rd_disp(:) - 1, &
mp_obj_new)
CASE (dbcsr_type_real_8)
!CALL mp_alltoall(&
! send_data%d%r_dp(:), total_send_count(2,:), sd_disp(:)-1,&
! recv_data%d%r_dp(:), total_recv_count(2,:), rd_disp(:)-1,&
! mp_group)
CALL hybrid_alltoall_d1( &
send_data%d%r_dp(:), total_send_count(2, :), sd_disp(:) - 1, &
recv_data%d%r_dp(:), total_recv_count(2, :), rd_disp(:) - 1, &
mp_obj_new)
CASE (dbcsr_type_complex_4)
CALL hybrid_alltoall_c1( &
send_data%d%c_sp(:), total_send_count(2, :), sd_disp(:) - 1, &
recv_data%d%c_sp(:), total_recv_count(2, :), rd_disp(:) - 1, &
mp_obj_new)
CASE (dbcsr_type_complex_8)
CALL hybrid_alltoall_z1( &
send_data%d%c_dp(:), total_send_count(2, :), sd_disp(:) - 1, &
recv_data%d%c_dp(:), total_recv_count(2, :), rd_disp(:) - 1, &
mp_obj_new)
END SELECT
!CALL mp_alltoall(send_data(:), total_send_count(2,:), sd_disp(:)-1,&
! recv_data(:), total_recv_count(2,:), rd_disp(:)-1, mp_group)
CALL hybrid_alltoall_i1(send_meta(:), metalen*total_send_count(1, :), sm_disp(:) - 1, &
recv_meta(:), metalen*total_recv_count(1, :), rm_disp(:) - 1, mp_obj_new)
! Now fill in the data.
CALL dbcsr_work_create(redist, &
SUM(recv_count(1, :)), &
SUM(recv_count(2, :)), work_mutable=.FALSE., n=1)
!
blk_ps = 1
blks = 0
DO src_p = 0, numnodes - 1
!data_offset_l = rd_disp(src_p)
DO meta_l = 1, recv_count(1, src_p)
row = recv_meta(rm_disp(src_p) + metalen*(meta_l - 1))
tr = row .LT. 0
stored_row_new = ABS(row)
stored_col_new = recv_meta(rm_disp(src_p) + metalen*(meta_l - 1) + 1)
nze = row_blk_size_new(stored_row_new)*col_blk_size_new(stored_col_new)
!r_dp => recv_data(blk_ps:blk_ps+nze-1)
!CALL dbcsr_put_block(redist, stored_row_new, stored_col_new, r_dp, tr)
!### this should be changed to be like the make images (i.e., copy data in finalize, not here & now)
data_block = pointer_view(data_block, recv_data, blk_ps, nze)
CALL dbcsr_put_block(redist, stored_row_new, stored_col_new, data_block, transposed=tr)
blk_ps = blk_ps + nze
blks = blks + 1
END DO
END DO
CALL dbcsr_data_clear_pointer(data_block)
CALL dbcsr_data_release(data_block)
!
IF (dbg) THEN
WRITE (*, *) routineN//" Declared blocks=", redist%wms(1)%lastblk, &
"actual=", blks
WRITE (*, *) routineN//" Declared data size=", redist%wms(1)%datasize, &
"actual=", blk_ps
END IF
CALL dbcsr_finalize(redist)
CALL dbcsr_data_release(recv_data)
CALL dbcsr_data_release(send_data)
DEALLOCATE (send_count)
DEALLOCATE (recv_count)
DEALLOCATE (sdp); DEALLOCATE (sd_disp)
DEALLOCATE (smp); DEALLOCATE (sm_disp)
DEALLOCATE (rd_disp)
DEALLOCATE (rm_disp)
DEALLOCATE (recv_meta)
DEALLOCATE (send_meta)
CALL timestop(handle)
END SUBROUTINE dbcsr_redistribute