Replicates a DBCSR matrix among process rows and columns
Direction definition Row replication means that all processors in a process grid sharing the same row get the data of the entire row. (In a 1-column grid the operation has no effect.) Similar logic applies to column replication.
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(dbcsr_type), | intent(inout) | :: | matrix |
matrix to replicate |
||
logical, | intent(in) | :: | replicate_rows |
Row should be replicated among all processors Column should be replicated among all processors |
||
logical, | intent(in) | :: | replicate_columns |
Row should be replicated among all processors Column should be replicated among all processors |
||
integer, | intent(in), | optional | :: | restrict_source |
Send only from this node (ignores blocks on other nodes) |
SUBROUTINE dbcsr_replicate(matrix, replicate_rows, replicate_columns, & restrict_source) !! Replicates a DBCSR matrix among process rows and columns !! !! Direction definition !! Row replication means that all processors in a process grid sharing !! the same row get the data of the entire row. (In a 1-column grid the !! operation has no effect.) Similar logic applies to column replication. TYPE(dbcsr_type), INTENT(INOUT) :: matrix !! matrix to replicate LOGICAL, INTENT(IN) :: replicate_rows, replicate_columns !! Row should be replicated among all processors !! Column should be replicated among all processors INTEGER, INTENT(IN), OPTIONAL :: restrict_source !! Send only from this node (ignores blocks on other nodes) CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_replicate' INTEGER, PARAMETER :: metalen = 3 LOGICAL, PARAMETER :: dbg = .FALSE. CHARACTER :: rep_type INTEGER :: blk, blk_l, blk_p, blk_ps, blks, col, col_size, data_type, dst_p, handle, & mynode, mypcol, myprow, nblks, numnodes, nze, offset, row, row_size, smp, & src_p, stored_blk_p, stored_col, stored_row INTEGER, ALLOCATABLE, DIMENSION(:) :: rd_disp, recv_meta, rm_disp, send_meta INTEGER, ALLOCATABLE, DIMENSION(:, :) :: recv_count INTEGER, DIMENSION(2) :: send_count INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: col_blk_size, col_dist, row_blk_size, & row_dist, tmp_index INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi LOGICAL :: had_subcomms, i_am_restricted, rest_src, & tr TYPE(dbcsr_data_obj) :: data_block, recv_data, send_data, & tmp_data TYPE(dbcsr_distribution_obj) :: target_dist TYPE(dbcsr_iterator) :: iter TYPE(dbcsr_mp_obj) :: mp_obj TYPE(dbcsr_type) :: replicated TYPE(mp_comm_type) :: mp_group ! --------------------------------------------------------------------------- CALL timeset(routineN, handle) IF (.NOT. dbcsr_valid_index(matrix)) & DBCSR_ABORT("Matrix not initialized.") !IF(matrix%replication_type /= dbcsr_repl_none) & ! DBCSR_WARN("Replicating a non-distributed matrix makes no sense.") IF (replicate_rows .AND. replicate_columns) THEN rep_type = dbcsr_repl_full ELSEIF (replicate_rows .AND. .NOT. replicate_columns) THEN rep_type = dbcsr_repl_row ELSEIF (replicate_columns .AND. .NOT. replicate_rows) THEN rep_type = dbcsr_repl_col ELSE rep_type = dbcsr_repl_none IF (.NOT. replicate_rows .AND. .NOT. replicate_columns) & DBCSR_ABORT("Some replication must be specified") END IF data_type = dbcsr_get_data_type(matrix) row_blk_size => array_data(matrix%row_blk_size) col_blk_size => array_data(matrix%col_blk_size) target_dist = matrix%dist row_dist => dbcsr_distribution_row_dist(target_dist) col_dist => dbcsr_distribution_col_dist(target_dist) mp_obj = dbcsr_distribution_mp(target_dist) blacs2mpi => dbcsr_mp_pgrid(mp_obj) numnodes = dbcsr_mp_numnodes(mp_obj) mynode = dbcsr_mp_mynode(mp_obj) myprow = dbcsr_mp_myprow(mp_obj) mypcol = dbcsr_mp_mypcol(mp_obj) IF (MAXVAL(row_dist) .GT. UBOUND(blacs2mpi, 1)) & DBCSR_ABORT('Row distribution references unexistent processor rows') IF (dbg) THEN IF (MAXVAL(row_dist) .NE. UBOUND(blacs2mpi, 1)) & DBCSR_WARN('Range of row distribution not equal to processor rows') END IF IF (MAXVAL(col_dist) .GT. UBOUND(blacs2mpi, 2)) & DBCSR_ABORT('Col distribution references unexistent processor cols') IF (dbg) THEN IF (MAXVAL(col_dist) .NE. UBOUND(blacs2mpi, 2)) & DBCSR_WARN('Range of col distribution not equal to processor cols') END IF ! Define the number of nodes with which I will communicate. Also ! setup row and column communicators. had_subcomms = dbcsr_mp_has_subgroups(mp_obj) SELECT CASE (rep_type) CASE (dbcsr_repl_full) numnodes = dbcsr_mp_numnodes(mp_obj) mp_group = dbcsr_mp_group(mp_obj) mynode = dbcsr_mp_mynode(mp_obj) CASE (dbcsr_repl_row) numnodes = dbcsr_mp_npcols(mp_obj) CALL dbcsr_mp_grid_setup(mp_obj) mp_group = dbcsr_mp_my_row_group(mp_obj) mynode = dbcsr_mp_mypcol(mp_obj) CASE (dbcsr_repl_col) numnodes = dbcsr_mp_nprows(mp_obj) CALL dbcsr_mp_grid_setup(mp_obj) mp_group = dbcsr_mp_my_col_group(mp_obj) mynode = dbcsr_mp_myprow(mp_obj) CASE (dbcsr_repl_none) numnodes = 1 mp_group = dbcsr_mp_group(mp_obj) mynode = 0 END SELECT IF ((rep_type == dbcsr_repl_row .OR. rep_type == dbcsr_repl_col) .AND. .NOT. dbcsr_mp_has_subgroups(mp_obj)) & DBCSR_ABORT("Only full replication supported when subcommunicators are turned off.") ! IF (PRESENT(restrict_source)) THEN rest_src = .TRUE. i_am_restricted = mynode .NE. restrict_source ELSE rest_src = .FALSE. i_am_restricted = .FALSE. END IF ! ALLOCATE (recv_count(2, 0:numnodes - 1)) ALLOCATE (rd_disp(0:numnodes - 1)) ALLOCATE (rm_disp(0:numnodes - 1)) CALL dbcsr_create(replicated, name='Replicated '//TRIM(matrix%name), & template=matrix, & matrix_type=dbcsr_type_no_symmetry, & replication_type=rep_type) ! ! Count initial sizes for sending. Also, ensure that blocks are on their ! home processors. send_count(1:2) = 0 CALL dbcsr_iterator_start(iter, matrix) IF (.NOT. i_am_restricted) THEN DO WHILE (dbcsr_iterator_blocks_left(iter)) CALL dbcsr_iterator_next_block(iter, row, col, & row_size=row_size, col_size=col_size, blk=blk) !tr = .FALSE. !CALL dbcsr_get_stored_coordinates (matrix, row, col, tr, dst_p) !IF(dst_p /= mynode) & ! DBCSR_ABORT("Matrix is not correctly distributed. Call dbcsr_redistribute.") nze = row_size*col_size send_count(1) = send_count(1) + 1 send_count(2) = send_count(2) + nze END DO send_count(2) = dbcsr_data_get_size_referenced(matrix%data_area) END IF CALL dbcsr_iterator_stop(iter) ! Exchange how much data others have. CALL mp_allgather(send_count(1:2), recv_count, mp_group) CALL dbcsr_data_init(recv_data) nze = SUM(recv_count(2, :)) nblks = SUM(recv_count(1, :)) CALL dbcsr_data_new(recv_data, data_type=data_type, data_size=nze) ! send_data should have the correct size CALL dbcsr_data_init(send_data) IF (send_count(2) .EQ. 0) THEN CALL dbcsr_data_new(send_data, data_type=data_type, data_size=0) ELSE CALL dbcsr_data_new(send_data, data_type=data_type) send_data = pointer_view(send_data, matrix%data_area, 1, send_count(2)) END IF ALLOCATE (recv_meta(metalen*nblks)) ALLOCATE (send_meta(metalen*send_count(1))) recv_meta(:) = 0 ! Fill in the meta data structures and copy the data. rd_disp = -1; rm_disp = -1 rd_disp(0) = 1; rm_disp(0) = 1 DO dst_p = 1, numnodes - 1 rm_disp(dst_p) = rm_disp(dst_p - 1) & + metalen*recv_count(1, dst_p - 1) rd_disp(dst_p) = rd_disp(dst_p - 1) & + recv_count(2, dst_p - 1) END DO CALL dbcsr_data_init(data_block) CALL dbcsr_data_new(data_block, data_type=data_type) CALL dbcsr_iterator_start(iter, matrix) smp = 1 IF (.NOT. i_am_restricted) THEN DO WHILE (dbcsr_iterator_blocks_left(iter)) CALL dbcsr_iterator_next_block(iter, row, col, blk, & transposed=tr, blk_p=blk_p) send_meta(smp + 0) = row send_meta(smp + 1) = col send_meta(smp + 2) = blk_p smp = smp + metalen END DO END IF CALL dbcsr_iterator_stop(iter) CALL dbcsr_data_clear_pointer(data_block) CALL dbcsr_data_release(data_block) ! Exchange the data and metadata structures. CALL mp_allgather(send_meta, recv_meta, metalen*recv_count(1, :), & rm_disp - 1, mp_group) CALL dbcsr_allgatherv(send_data, dbcsr_data_get_size(send_data), & recv_data, recv_count(2, :), & rd_disp - 1, mp_group) ! Release the send buffer. If it had a non-zero size then it was a ! pointer into the regular matrix and the data pointer should be ! cleared and not deallocated. IF (send_count(2) .NE. 0) THEN CALL dbcsr_data_clear_pointer(send_data) END IF CALL dbcsr_data_release(send_data) ! ! Now fill in the data. CALL dbcsr_work_create(replicated, & SUM(recv_count(1, :)), & SUM(recv_count(2, :)), n=1, & work_mutable=.FALSE.) CALL dbcsr_data_hold(recv_data) CALL dbcsr_data_release(replicated%wms(1)%data_area) replicated%wms(1)%data_area = recv_data blk_ps = 1 blks = 1 DO src_p = 0, numnodes - 1 nze = recv_count(2, src_p) !CALL dbcsr_data_set (replicated%wms(1)%data_area, blk_ps, nze,& ! recv_data, rd_disp(src_p)) offset = rd_disp(src_p) - 1 DO blk_l = 1, recv_count(1, src_p) IF (dbg) WRITE (*, *) "src_p, blk_l", src_p, blk_l stored_row = recv_meta(rm_disp(src_p) + metalen*(blk_l - 1)) stored_col = recv_meta(rm_disp(src_p) + metalen*(blk_l - 1) + 1) stored_blk_p = recv_meta(rm_disp(src_p) + metalen*(blk_l - 1) + 2) replicated%wms(1)%row_i(blks) = stored_row replicated%wms(1)%col_i(blks) = stored_col replicated%wms(1)%blk_p(blks) = SIGN(ABS(stored_blk_p) + offset, & stored_blk_p) nze = row_blk_size(stored_row) & *col_blk_size(stored_col) blk_ps = MAX(blk_ps, ABS(stored_blk_p) + nze + offset) blks = blks + 1 END DO END DO CALL dbcsr_data_set_size_referenced(replicated%wms(1)%data_area, blk_ps - 1) ! replicated%wms(1)%lastblk = blks - 1 replicated%wms(1)%datasize = blk_ps - 1 CALL dbcsr_finalize(replicated, reshuffle=.TRUE.) ! ! Remove communicators if they were forcibly created. IF (had_subcomms .AND. & (rep_type .EQ. dbcsr_repl_row .OR. rep_type .EQ. dbcsr_repl_col)) THEN CALL dbcsr_mp_grid_remove(mp_obj) END IF DEALLOCATE (recv_count) DEALLOCATE (rd_disp) DEALLOCATE (rm_disp) CALL dbcsr_data_release(recv_data) DEALLOCATE (recv_meta) DEALLOCATE (send_meta) matrix%replication_type = replicated%replication_type ! Now replace the data and index CALL dbcsr_switch_data_area(matrix, replicated%data_area, & previous_data_area=tmp_data) CALL dbcsr_switch_data_area(replicated, tmp_data) CALL dbcsr_data_release(tmp_data) tmp_index => matrix%index matrix%index => replicated%index replicated%index => tmp_index CALL dbcsr_repoint_index(matrix) matrix%nze = replicated%nze matrix%nblks = replicated%nblks CALL dbcsr_release(replicated) CALL dbcsr_verify_matrix(matrix) CALL timestop(handle) END SUBROUTINE dbcsr_replicate