Replicates a DBCSR matrix among process rows and columns
Direction definition Row replication means that all processors in a process grid sharing the same row get the data of the entire row. (In a 1-column grid the operation has no effect.) Similar logic applies to column replication.
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(dbcsr_type), | intent(inout) | :: | matrix |
matrix to replicate |
||
logical, | intent(in) | :: | replicate_rows |
Row should be replicated among all processors Column should be replicated among all processors |
||
logical, | intent(in) | :: | replicate_columns |
Row should be replicated among all processors Column should be replicated among all processors |
||
integer, | intent(in), | optional | :: | restrict_source |
Send only from this node (ignores blocks on other nodes) |
SUBROUTINE dbcsr_replicate(matrix, replicate_rows, replicate_columns, &
restrict_source)
!! Replicates a DBCSR matrix among process rows and columns
!!
!! Direction definition
!! Row replication means that all processors in a process grid sharing
!! the same row get the data of the entire row. (In a 1-column grid the
!! operation has no effect.) Similar logic applies to column replication.
TYPE(dbcsr_type), INTENT(INOUT) :: matrix
!! matrix to replicate
LOGICAL, INTENT(IN) :: replicate_rows, replicate_columns
!! Row should be replicated among all processors
!! Column should be replicated among all processors
INTEGER, INTENT(IN), OPTIONAL :: restrict_source
!! Send only from this node (ignores blocks on other nodes)
CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_replicate'
INTEGER, PARAMETER :: metalen = 3
LOGICAL, PARAMETER :: dbg = .FALSE.
CHARACTER :: rep_type
INTEGER :: blk, blk_l, blk_p, blk_ps, blks, col, col_size, data_type, dst_p, handle, &
mp_group, mynode, mypcol, myprow, nblks, numnodes, nze, offset, row, row_size, smp, &
src_p, stored_blk_p, stored_col, stored_row
INTEGER, ALLOCATABLE, DIMENSION(:) :: rd_disp, recv_meta, rm_disp, send_meta
INTEGER, ALLOCATABLE, DIMENSION(:, :) :: recv_count
INTEGER, DIMENSION(2) :: send_count
INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: col_blk_size, col_dist, row_blk_size, &
row_dist, tmp_index
INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi
LOGICAL :: had_subcomms, i_am_restricted, rest_src, &
tr
TYPE(dbcsr_data_obj) :: data_block, recv_data, send_data, &
tmp_data
TYPE(dbcsr_distribution_obj) :: target_dist
TYPE(dbcsr_iterator) :: iter
TYPE(dbcsr_mp_obj) :: mp_obj
TYPE(dbcsr_type) :: replicated
! ---------------------------------------------------------------------------
CALL timeset(routineN, handle)
IF (.NOT. dbcsr_valid_index(matrix)) &
DBCSR_ABORT("Matrix not initialized.")
!IF(matrix%replication_type /= dbcsr_repl_none) &
! DBCSR_WARN("Replicating a non-distributed matrix makes no sense.")
IF (replicate_rows .AND. replicate_columns) THEN
rep_type = dbcsr_repl_full
ELSEIF (replicate_rows .AND. .NOT. replicate_columns) THEN
rep_type = dbcsr_repl_row
ELSEIF (replicate_columns .AND. .NOT. replicate_rows) THEN
rep_type = dbcsr_repl_col
ELSE
rep_type = dbcsr_repl_none
IF (.NOT. replicate_rows .AND. .NOT. replicate_columns) &
DBCSR_ABORT("Some replication must be specified")
END IF
data_type = dbcsr_get_data_type(matrix)
row_blk_size => array_data(matrix%row_blk_size)
col_blk_size => array_data(matrix%col_blk_size)
target_dist = matrix%dist
row_dist => dbcsr_distribution_row_dist(target_dist)
col_dist => dbcsr_distribution_col_dist(target_dist)
mp_obj = dbcsr_distribution_mp(target_dist)
blacs2mpi => dbcsr_mp_pgrid(mp_obj)
numnodes = dbcsr_mp_numnodes(mp_obj)
mynode = dbcsr_mp_mynode(mp_obj)
myprow = dbcsr_mp_myprow(mp_obj)
mypcol = dbcsr_mp_mypcol(mp_obj)
IF (MAXVAL(row_dist) .GT. UBOUND(blacs2mpi, 1)) &
DBCSR_ABORT('Row distribution references unexistent processor rows')
IF (dbg) THEN
IF (MAXVAL(row_dist) .NE. UBOUND(blacs2mpi, 1)) &
DBCSR_WARN('Range of row distribution not equal to processor rows')
END IF
IF (MAXVAL(col_dist) .GT. UBOUND(blacs2mpi, 2)) &
DBCSR_ABORT('Col distribution references unexistent processor cols')
IF (dbg) THEN
IF (MAXVAL(col_dist) .NE. UBOUND(blacs2mpi, 2)) &
DBCSR_WARN('Range of col distribution not equal to processor cols')
END IF
! Define the number of nodes with which I will communicate. Also
! setup row and column communicators.
had_subcomms = dbcsr_mp_has_subgroups(mp_obj)
SELECT CASE (rep_type)
CASE (dbcsr_repl_full)
numnodes = dbcsr_mp_numnodes(mp_obj)
mp_group = dbcsr_mp_group(mp_obj)
mynode = dbcsr_mp_mynode(mp_obj)
CASE (dbcsr_repl_row)
numnodes = dbcsr_mp_npcols(mp_obj)
CALL dbcsr_mp_grid_setup(mp_obj)
mp_group = dbcsr_mp_my_row_group(mp_obj)
mynode = dbcsr_mp_mypcol(mp_obj)
CASE (dbcsr_repl_col)
numnodes = dbcsr_mp_nprows(mp_obj)
CALL dbcsr_mp_grid_setup(mp_obj)
mp_group = dbcsr_mp_my_col_group(mp_obj)
mynode = dbcsr_mp_myprow(mp_obj)
CASE (dbcsr_repl_none)
numnodes = 1
mp_group = dbcsr_mp_group(mp_obj)
mynode = 0
END SELECT
IF ((rep_type == dbcsr_repl_row .OR. rep_type == dbcsr_repl_col) .AND. .NOT. dbcsr_mp_has_subgroups(mp_obj)) &
DBCSR_ABORT("Only full replication supported when subcommunicators are turned off.")
!
IF (PRESENT(restrict_source)) THEN
rest_src = .TRUE.
i_am_restricted = mynode .NE. restrict_source
ELSE
rest_src = .FALSE.
i_am_restricted = .FALSE.
END IF
!
ALLOCATE (recv_count(2, 0:numnodes - 1))
ALLOCATE (rd_disp(0:numnodes - 1))
ALLOCATE (rm_disp(0:numnodes - 1))
CALL dbcsr_create(replicated, name='Replicated '//TRIM(matrix%name), &
template=matrix, &
matrix_type=dbcsr_type_no_symmetry, &
replication_type=rep_type)
!
! Count initial sizes for sending. Also, ensure that blocks are on their
! home processors.
send_count(1:2) = 0
CALL dbcsr_iterator_start(iter, matrix)
IF (.NOT. i_am_restricted) THEN
DO WHILE (dbcsr_iterator_blocks_left(iter))
CALL dbcsr_iterator_next_block(iter, row, col, &
row_size=row_size, col_size=col_size, blk=blk)
!tr = .FALSE.
!CALL dbcsr_get_stored_coordinates (matrix, row, col, tr, dst_p)
!IF(dst_p /= mynode) &
! DBCSR_ABORT("Matrix is not correctly distributed. Call dbcsr_redistribute.")
nze = row_size*col_size
send_count(1) = send_count(1) + 1
send_count(2) = send_count(2) + nze
END DO
send_count(2) = dbcsr_data_get_size_referenced(matrix%data_area)
END IF
CALL dbcsr_iterator_stop(iter)
! Exchange how much data others have.
CALL mp_allgather(send_count(1:2), recv_count, mp_group)
CALL dbcsr_data_init(recv_data)
nze = SUM(recv_count(2, :))
nblks = SUM(recv_count(1, :))
CALL dbcsr_data_new(recv_data, data_type=data_type, data_size=nze)
! send_data should have the correct size
CALL dbcsr_data_init(send_data)
IF (send_count(2) .EQ. 0) THEN
CALL dbcsr_data_new(send_data, data_type=data_type, data_size=0)
ELSE
CALL dbcsr_data_new(send_data, data_type=data_type)
send_data = pointer_view(send_data, matrix%data_area, 1, send_count(2))
END IF
ALLOCATE (recv_meta(metalen*nblks))
ALLOCATE (send_meta(metalen*send_count(1)))
recv_meta(:) = 0
! Fill in the meta data structures and copy the data.
rd_disp = -1; rm_disp = -1
rd_disp(0) = 1; rm_disp(0) = 1
DO dst_p = 1, numnodes - 1
rm_disp(dst_p) = rm_disp(dst_p - 1) &
+ metalen*recv_count(1, dst_p - 1)
rd_disp(dst_p) = rd_disp(dst_p - 1) &
+ recv_count(2, dst_p - 1)
END DO
CALL dbcsr_data_init(data_block)
CALL dbcsr_data_new(data_block, data_type=data_type)
CALL dbcsr_iterator_start(iter, matrix)
smp = 1
IF (.NOT. i_am_restricted) THEN
DO WHILE (dbcsr_iterator_blocks_left(iter))
CALL dbcsr_iterator_next_block(iter, row, col, blk, &
transposed=tr, blk_p=blk_p)
send_meta(smp + 0) = row
send_meta(smp + 1) = col
send_meta(smp + 2) = blk_p
smp = smp + metalen
END DO
END IF
CALL dbcsr_iterator_stop(iter)
CALL dbcsr_data_clear_pointer(data_block)
CALL dbcsr_data_release(data_block)
! Exchange the data and metadata structures.
CALL mp_allgather(send_meta, recv_meta, metalen*recv_count(1, :), &
rm_disp - 1, mp_group)
CALL dbcsr_allgatherv(send_data, dbcsr_data_get_size(send_data), &
recv_data, recv_count(2, :), &
rd_disp - 1, mp_group)
! Release the send buffer. If it had a non-zero size then it was a
! pointer into the regular matrix and the data pointer should be
! cleared and not deallocated.
IF (send_count(2) .NE. 0) THEN
CALL dbcsr_data_clear_pointer(send_data)
END IF
CALL dbcsr_data_release(send_data)
!
! Now fill in the data.
CALL dbcsr_work_create(replicated, &
SUM(recv_count(1, :)), &
SUM(recv_count(2, :)), n=1, &
work_mutable=.FALSE.)
CALL dbcsr_data_hold(recv_data)
CALL dbcsr_data_release(replicated%wms(1)%data_area)
replicated%wms(1)%data_area = recv_data
blk_ps = 1
blks = 1
DO src_p = 0, numnodes - 1
nze = recv_count(2, src_p)
!CALL dbcsr_data_set (replicated%wms(1)%data_area, blk_ps, nze,&
! recv_data, rd_disp(src_p))
offset = rd_disp(src_p) - 1
DO blk_l = 1, recv_count(1, src_p)
IF (dbg) WRITE (*, *) "src_p, blk_l", src_p, blk_l
stored_row = recv_meta(rm_disp(src_p) + metalen*(blk_l - 1))
stored_col = recv_meta(rm_disp(src_p) + metalen*(blk_l - 1) + 1)
stored_blk_p = recv_meta(rm_disp(src_p) + metalen*(blk_l - 1) + 2)
replicated%wms(1)%row_i(blks) = stored_row
replicated%wms(1)%col_i(blks) = stored_col
replicated%wms(1)%blk_p(blks) = SIGN(ABS(stored_blk_p) + offset, &
stored_blk_p)
nze = row_blk_size(stored_row) &
*col_blk_size(stored_col)
blk_ps = MAX(blk_ps, ABS(stored_blk_p) + nze + offset)
blks = blks + 1
END DO
END DO
CALL dbcsr_data_set_size_referenced(replicated%wms(1)%data_area, blk_ps - 1)
!
replicated%wms(1)%lastblk = blks - 1
replicated%wms(1)%datasize = blk_ps - 1
CALL dbcsr_finalize(replicated, reshuffle=.TRUE.)
!
! Remove communicators if they were forcibly created.
IF (had_subcomms .AND. &
(rep_type .EQ. dbcsr_repl_row .OR. rep_type .EQ. dbcsr_repl_col)) THEN
CALL dbcsr_mp_grid_remove(mp_obj)
END IF
DEALLOCATE (recv_count)
DEALLOCATE (rd_disp)
DEALLOCATE (rm_disp)
CALL dbcsr_data_release(recv_data)
DEALLOCATE (recv_meta)
DEALLOCATE (send_meta)
matrix%replication_type = replicated%replication_type
! Now replace the data and index
CALL dbcsr_switch_data_area(matrix, replicated%data_area, &
previous_data_area=tmp_data)
CALL dbcsr_switch_data_area(replicated, tmp_data)
CALL dbcsr_data_release(tmp_data)
tmp_index => matrix%index
matrix%index => replicated%index
replicated%index => tmp_index
CALL dbcsr_repoint_index(matrix)
matrix%nze = replicated%nze
matrix%nblks = replicated%nblks
CALL dbcsr_release(replicated)
CALL dbcsr_verify_matrix(matrix)
CALL timestop(handle)
END SUBROUTINE dbcsr_replicate