splits the given communicator in group in subgroups trying to organize them in a way that the communication within each subgroup is efficient (but not necessarily the communication between subgroups)
at least one of subgroup_min_size and n_subgroups is needed, the other default to the value needed to use most processors. if less cpus are present than needed for subgroup min size, n_subgroups, just one comm is created that contains all cpus
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
integer, | intent(in) | :: | comm |
the mpi communicator that you want to split |
||
integer, | intent(out) | :: | sub_comm |
the communicator for the subgroup (created, needs to be freed later) actual number of groups |
||
integer, | intent(out) | :: | ngroups |
the communicator for the subgroup (created, needs to be freed later) actual number of groups |
||
integer, | DIMENSION(0:) | :: | group_distribution |
input : allocated with array with the nprocs entries (0 .. nprocs-1) |
||
integer, | intent(in), | optional | :: | subgroup_min_size |
the minimum size of the subgroup the number of subgroups wanted |
|
integer, | intent(in), | optional | :: | n_subgroups |
the minimum size of the subgroup the number of subgroups wanted |
|
integer, | optional, | DIMENSION(0:) | :: | group_partition |
n_subgroups sized array containing the number of cpus wanted per group. should match the total number of cpus (only used if present and associated) (0..ngroups-1) |
|
integer, | optional | :: | stride |
create groups using a stride (default=1) through the ranks of the comm to be split. |
SUBROUTINE mp_comm_split(comm, sub_comm, ngroups, group_distribution, &
subgroup_min_size, n_subgroups, group_partition, stride)
!! splits the given communicator in group in subgroups trying to organize
!! them in a way that the communication within each subgroup is
!! efficient (but not necessarily the communication between subgroups)
!! @note
!! at least one of subgroup_min_size and n_subgroups is needed,
!! the other default to the value needed to use most processors.
!! if less cpus are present than needed for subgroup min size, n_subgroups,
!! just one comm is created that contains all cpus
INTEGER, INTENT(in) :: comm
!! the mpi communicator that you want to split
INTEGER, INTENT(out) :: sub_comm, ngroups
!! the communicator for the subgroup (created, needs to be freed later)
!! actual number of groups
INTEGER, DIMENSION(0:) :: group_distribution
!! input : allocated with array with the nprocs entries (0 .. nprocs-1)
INTEGER, INTENT(in), OPTIONAL :: subgroup_min_size, n_subgroups
!! the minimum size of the subgroup
!! the number of subgroups wanted
INTEGER, DIMENSION(0:), OPTIONAL :: group_partition
!! n_subgroups sized array containing the number of cpus wanted per group. should match the total number of cpus (only used
!! if present and associated) (0..ngroups-1)
INTEGER, OPTIONAL :: stride
!! create groups using a stride (default=1) through the ranks of the comm to be split.
CHARACTER(LEN=*), PARAMETER :: routineN = 'mp_comm_split', routineP = moduleN//':'//routineN
INTEGER :: handle, ierr, mepos, nnodes
#if defined(__parallel)
INTEGER :: color, i, j, k, &
my_subgroup_min_size, &
istride, local_stride, irank
INTEGER, DIMENSION(:), ALLOCATABLE :: rank_permutation
#endif
ierr = 0
CALL timeset(routineN, handle)
! actual number of groups
IF (.NOT. PRESENT(subgroup_min_size) .AND. .NOT. PRESENT(n_subgroups)) THEN
DBCSR_ABORT(routineP//" missing arguments")
END IF
IF (PRESENT(subgroup_min_size) .AND. PRESENT(n_subgroups)) THEN
DBCSR_ABORT(routineP//" too many arguments")
END IF
CALL mp_environ(nnodes, mepos, comm)
IF (UBOUND(group_distribution, 1) .NE. nnodes - 1) THEN
DBCSR_ABORT(routineP//" group_distribution wrong bounds")
END IF
#if defined(__parallel)
IF (PRESENT(subgroup_min_size)) THEN
IF (subgroup_min_size < 0 .OR. subgroup_min_size > nnodes) THEN
DBCSR_ABORT(routineP//" subgroup_min_size too small or too large")
END IF
ngroups = nnodes/subgroup_min_size
my_subgroup_min_size = subgroup_min_size
ELSE ! n_subgroups
IF (n_subgroups <= 0) THEN
DBCSR_ABORT(routineP//" n_subgroups too small")
END IF
IF (nnodes/n_subgroups > 0) THEN ! we have a least one cpu per group
ngroups = n_subgroups
ELSE ! well, only one group then
ngroups = 1
END IF
my_subgroup_min_size = nnodes/ngroups
END IF
! rank_permutation: is a permutation of ranks, so that groups are not necessarily continuous in rank of the master group
! while the order is not critical (we only color ranks), it can e.g. be used to make groups that have just 1 rank per node
! (by setting stride equal to the number of mpi ranks per node), or by sharing a node between two groups (stride 2).
ALLOCATE (rank_permutation(0:nnodes - 1))
local_stride = 1
IF (PRESENT(stride)) local_stride = stride
k = 0
DO istride = 1, local_stride
DO irank = istride - 1, nnodes - 1, local_stride
rank_permutation(k) = irank
k = k + 1
END DO
END DO
DO i = 0, nnodes - 1
group_distribution(rank_permutation(i)) = MIN(i/my_subgroup_min_size, ngroups - 1)
END DO
! even the user gave a partition, see if we can use it to overwrite this choice
IF (PRESENT(group_partition)) THEN
IF (ALL(group_partition > 0) .AND. (SUM(group_partition) .EQ. nnodes) .AND. (ngroups == SIZE(group_partition))) THEN
k = 0
DO i = 0, SIZE(group_partition) - 1
DO j = 1, group_partition(i)
group_distribution(rank_permutation(k)) = i
k = k + 1
END DO
END DO
ELSE
! just ignore silently as we have reasonable defaults. Probably a warning would not be to bad
END IF
END IF
color = group_distribution(mepos)
CALL mpi_comm_split(comm, color, 0, sub_comm, ierr)
debug_comm_count = debug_comm_count + 1
IF (ierr /= mpi_success) CALL mp_stop(ierr, "in "//routineP//" split")
#else
CALL mp_comm_dup(comm, sub_comm)
group_distribution(0) = 0
ngroups = 1
MARK_USED(stride)
MARK_USED(group_partition)
#endif
CALL timestop(handle)
END SUBROUTINE mp_comm_split