Redistributes a DBCSR matrix. The new distribution should have compatible row and column blocks.
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(dbcsr_type), | intent(in) | :: | matrix |
matrix to redistribute |
||
type(dbcsr_type), | intent(inout) | :: | redist |
redistributed matrix, which should already be created |
SUBROUTINE dbcsr_redistribute(matrix, redist) !! Redistributes a DBCSR matrix. !! The new distribution should have compatible row and column blocks. TYPE(dbcsr_type), INTENT(IN) :: matrix !! matrix to redistribute TYPE(dbcsr_type), INTENT(INOUT) :: redist !! redistributed matrix, which should already be created CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_redistribute' INTEGER, PARAMETER :: metalen = 2 LOGICAL, PARAMETER :: dbg = .FALSE. INTEGER :: blk, blk_ps, blks, col, col_size, data_type, dst_p, handle, meta_l, & numnodes, nze, row, row_size, src_p, stored_col_new, stored_row_new INTEGER, ALLOCATABLE, DIMENSION(:) :: rd_disp, recv_meta, rm_disp, sd_disp, & sdp, send_meta, sm_disp, smp INTEGER, ALLOCATABLE, DIMENSION(:, :) :: recv_count, send_count, & total_recv_count, total_send_count INTEGER, DIMENSION(:), POINTER :: col_blk_size_new, col_dist_new, & row_blk_size_new, row_dist_new INTEGER, DIMENSION(:, :), POINTER :: pgrid LOGICAL :: sym_tr, tr TYPE(dbcsr_data_obj) :: data_block, recv_data, send_data TYPE(dbcsr_distribution_obj) :: dist_new TYPE(dbcsr_iterator) :: iter TYPE(dbcsr_mp_obj) :: mp_obj_new TYPE(mp_comm_type) :: mp_group ! --------------------------------------------------------------------------- CALL timeset(routineN, handle) !call dbcsr_print_dist (matrix%dist) !call dbcsr_print_dist (redist%dist) IF (.NOT. dbcsr_valid_index(matrix)) & DBCSR_ABORT("Input not valid.") IF (matrix%replication_type .NE. dbcsr_repl_none) & DBCSR_WARN("Can not redistribute replicated matrix.") data_type = matrix%data_type ! Get row and column start and end positions ! Old matrix ! New matrix dist_new = dbcsr_distribution(redist) row_blk_size_new => array_data(redist%row_blk_size) col_blk_size_new => array_data(redist%col_blk_size) row_dist_new => dbcsr_distribution_row_dist(dist_new) col_dist_new => dbcsr_distribution_col_dist(dist_new) ! mp_obj_new = dbcsr_distribution_mp(dist_new) pgrid => dbcsr_mp_pgrid(mp_obj_new) numnodes = dbcsr_mp_numnodes(mp_obj_new) mp_group = dbcsr_mp_group(mp_obj_new) ! IF (MAXVAL(row_dist_new) .GT. UBOUND(pgrid, 1)) & DBCSR_ABORT('Row distribution references unexistent processor rows') IF (dbg) THEN IF (MAXVAL(row_dist_new) .NE. UBOUND(pgrid, 1)) & DBCSR_WARN('Range of row distribution not equal to processor rows') END IF IF (MAXVAL(col_dist_new) .GT. UBOUND(pgrid, 2)) & DBCSR_ABORT('Col distribution references unexistent processor cols') IF (dbg) THEN IF (MAXVAL(col_dist_new) .NE. UBOUND(pgrid, 2)) & DBCSR_WARN('Range of col distribution not equal to processor cols') END IF ALLOCATE (send_count(2, 0:numnodes - 1)) ALLOCATE (recv_count(2, 0:numnodes - 1)) ALLOCATE (total_send_count(2, 0:numnodes - 1)) ALLOCATE (total_recv_count(2, 0:numnodes - 1)) ALLOCATE (sdp(0:numnodes - 1)) ALLOCATE (sd_disp(0:numnodes - 1)) ALLOCATE (smp(0:numnodes - 1)) ALLOCATE (sm_disp(0:numnodes - 1)) ALLOCATE (rd_disp(0:numnodes - 1)) ALLOCATE (rm_disp(0:numnodes - 1)) ! Count initial sizes for sending. ! send_count(:, :) = 0 CALL dbcsr_iterator_start(iter, matrix) dst_p = -1 DO WHILE (dbcsr_iterator_blocks_left(iter)) CALL dbcsr_iterator_next_block(iter, row, col, blk, tr, & row_size=row_size, col_size=col_size) sym_tr = .FALSE. CALL dbcsr_get_stored_coordinates(redist, & row, col, dst_p) nze = row_size*col_size send_count(1, dst_p) = send_count(1, dst_p) + 1 send_count(2, dst_p) = send_count(2, dst_p) + nze END DO CALL dbcsr_iterator_stop(iter) CALL mp_alltoall(send_count, recv_count, 2, mp_group) ! Allocate data structures needed for data exchange. CALL dbcsr_data_init(recv_data) CALL dbcsr_data_new(recv_data, data_type, SUM(recv_count(2, :))) ALLOCATE (recv_meta(metalen*SUM(recv_count(1, :)))) CALL dbcsr_data_init(send_data) CALL dbcsr_data_new(send_data, data_type, SUM(send_count(2, :))) ALLOCATE (send_meta(metalen*SUM(send_count(1, :)))) ! Fill in the meta data structures and copy the data. DO dst_p = 0, numnodes - 1 total_send_count(1, dst_p) = send_count(1, dst_p) total_send_count(2, dst_p) = send_count(2, dst_p) total_recv_count(1, dst_p) = recv_count(1, dst_p) total_recv_count(2, dst_p) = recv_count(2, dst_p) END DO sd_disp = -1; sm_disp = -1 rd_disp = -1; rm_disp = -1 sd_disp(0) = 1; sm_disp(0) = 1 rd_disp(0) = 1; rm_disp(0) = 1 DO dst_p = 1, numnodes - 1 sm_disp(dst_p) = sm_disp(dst_p - 1) & + metalen*total_send_count(1, dst_p - 1) sd_disp(dst_p) = sd_disp(dst_p - 1) & + total_send_count(2, dst_p - 1) rm_disp(dst_p) = rm_disp(dst_p - 1) & + metalen*total_recv_count(1, dst_p - 1) rd_disp(dst_p) = rd_disp(dst_p - 1) & + total_recv_count(2, dst_p - 1) END DO sdp(:) = sd_disp ! sdp points to the the next place to store ! data. It is postincremented. smp(:) = sm_disp - metalen ! But smp points to the "working" data, not ! the next. It is pre-incremented, so we must ! first rewind it. CALL dbcsr_data_init(data_block) CALL dbcsr_data_new(data_block, data_type) CALL dbcsr_iterator_start(iter, matrix) dst_p = -1 DO WHILE (dbcsr_iterator_blocks_left(iter)) CALL dbcsr_iterator_next_block(iter, row, col, data_block, tr, blk) !IF (tr) WRITE(*,*)"block at",row,col," is transposed" sym_tr = .FALSE. CALL dbcsr_get_stored_coordinates(redist, & row, col, dst_p) smp(dst_p) = smp(dst_p) + metalen IF (tr) THEN send_meta(smp(dst_p)) = -row ELSE send_meta(smp(dst_p)) = row END IF send_meta(smp(dst_p) + 1) = col ! new blocked column nze = dbcsr_data_get_size(data_block) CALL dbcsr_data_set(send_data, lb=sdp(dst_p), data_size=nze, & src=data_block, source_lb=1) !send_data(sdp(dst_p):sdp(dst_p)+SIZE(r_dp)-1) & ! = r_dp(:) sdp(dst_p) = sdp(dst_p) + nze END DO CALL dbcsr_iterator_stop(iter) CALL dbcsr_data_clear_pointer(data_block) ! Exchange the data and metadata structures. SELECT CASE (data_type) CASE (dbcsr_type_real_4) CALL hybrid_alltoall_s1( & send_data%d%r_sp(:), total_send_count(2, :), sd_disp(:) - 1, & recv_data%d%r_sp(:), total_recv_count(2, :), rd_disp(:) - 1, & mp_obj_new) CASE (dbcsr_type_real_8) !CALL mp_alltoall(& ! send_data%d%r_dp(:), total_send_count(2,:), sd_disp(:)-1,& ! recv_data%d%r_dp(:), total_recv_count(2,:), rd_disp(:)-1,& ! mp_group) CALL hybrid_alltoall_d1( & send_data%d%r_dp(:), total_send_count(2, :), sd_disp(:) - 1, & recv_data%d%r_dp(:), total_recv_count(2, :), rd_disp(:) - 1, & mp_obj_new) CASE (dbcsr_type_complex_4) CALL hybrid_alltoall_c1( & send_data%d%c_sp(:), total_send_count(2, :), sd_disp(:) - 1, & recv_data%d%c_sp(:), total_recv_count(2, :), rd_disp(:) - 1, & mp_obj_new) CASE (dbcsr_type_complex_8) CALL hybrid_alltoall_z1( & send_data%d%c_dp(:), total_send_count(2, :), sd_disp(:) - 1, & recv_data%d%c_dp(:), total_recv_count(2, :), rd_disp(:) - 1, & mp_obj_new) END SELECT !CALL mp_alltoall(send_data(:), total_send_count(2,:), sd_disp(:)-1,& ! recv_data(:), total_recv_count(2,:), rd_disp(:)-1, mp_group) CALL hybrid_alltoall_i1(send_meta(:), metalen*total_send_count(1, :), sm_disp(:) - 1, & recv_meta(:), metalen*total_recv_count(1, :), rm_disp(:) - 1, mp_obj_new) ! Now fill in the data. CALL dbcsr_work_create(redist, & SUM(recv_count(1, :)), & SUM(recv_count(2, :)), work_mutable=.FALSE., n=1) ! blk_ps = 1 blks = 0 DO src_p = 0, numnodes - 1 !data_offset_l = rd_disp(src_p) DO meta_l = 1, recv_count(1, src_p) row = recv_meta(rm_disp(src_p) + metalen*(meta_l - 1)) tr = row .LT. 0 stored_row_new = ABS(row) stored_col_new = recv_meta(rm_disp(src_p) + metalen*(meta_l - 1) + 1) nze = row_blk_size_new(stored_row_new)*col_blk_size_new(stored_col_new) !r_dp => recv_data(blk_ps:blk_ps+nze-1) !CALL dbcsr_put_block(redist, stored_row_new, stored_col_new, r_dp, tr) !### this should be changed to be like the make images (i.e., copy data in finalize, not here & now) data_block = pointer_view(data_block, recv_data, blk_ps, nze) CALL dbcsr_put_block(redist, stored_row_new, stored_col_new, data_block, transposed=tr) blk_ps = blk_ps + nze blks = blks + 1 END DO END DO CALL dbcsr_data_clear_pointer(data_block) CALL dbcsr_data_release(data_block) ! IF (dbg) THEN WRITE (*, *) routineN//" Declared blocks=", redist%wms(1)%lastblk, & "actual=", blks WRITE (*, *) routineN//" Declared data size=", redist%wms(1)%datasize, & "actual=", blk_ps END IF CALL dbcsr_finalize(redist) CALL dbcsr_data_release(recv_data) CALL dbcsr_data_release(send_data) DEALLOCATE (send_count) DEALLOCATE (recv_count) DEALLOCATE (sdp); DEALLOCATE (sd_disp) DEALLOCATE (smp); DEALLOCATE (sm_disp) DEALLOCATE (rd_disp) DEALLOCATE (rm_disp) DEALLOCATE (recv_meta) DEALLOCATE (send_meta) CALL timestop(handle) END SUBROUTINE dbcsr_redistribute