Performs a variety of matrix multiplies of same matrices on different processor grids
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
integer, | DIMENSION(:, :) | :: | group_sizes |
array of (sub) communicator sizes to test (2-D) |
||
type(dbcsr_type), | intent(in) | :: | matrix_a |
matrices to multiply matrices to multiply matrices to multiply |
||
type(dbcsr_type), | intent(in) | :: | matrix_b |
matrices to multiply matrices to multiply matrices to multiply |
||
type(dbcsr_type), | intent(in) | :: | matrix_c |
matrices to multiply matrices to multiply matrices to multiply |
||
character(len=1), | intent(in) | :: | transa | |||
character(len=1), | intent(in) | :: | transb | |||
type(dbcsr_scalar_type), | intent(in) | :: | alpha | |||
type(dbcsr_scalar_type), | intent(in) | :: | beta | |||
integer, | intent(in), | optional, | DIMENSION(6) | :: | limits | |
logical, | intent(in), | optional | :: | retain_sparsity | ||
integer, | intent(in) | :: | n_loops | |||
real(kind=dp), | intent(in) | :: | eps | |||
integer, | intent(in) | :: | io_unit |
which unit to write to, if not negative |
||
logical | :: | always_checksum |
SUBROUTINE test_multiplies_multiproc(group_sizes, &
matrix_a, matrix_b, matrix_c, &
transa, transb, alpha, beta, limits, retain_sparsity, &
n_loops, eps, &
io_unit, always_checksum)
!! Performs a variety of matrix multiplies of same matrices on different
!! processor grids
INTEGER, DIMENSION(:, :) :: group_sizes
!! array of (sub) communicator sizes to test (2-D)
TYPE(dbcsr_type), INTENT(in) :: matrix_a, matrix_b, matrix_c
!! matrices to multiply
!! matrices to multiply
!! matrices to multiply
CHARACTER, INTENT(in) :: transa, transb
TYPE(dbcsr_scalar_type), INTENT(in) :: alpha, beta
INTEGER, DIMENSION(6), INTENT(in), OPTIONAL :: limits
LOGICAL, INTENT(in), OPTIONAL :: retain_sparsity
INTEGER, INTENT(IN) :: n_loops
REAL(kind=dp), INTENT(in) :: eps
INTEGER, INTENT(IN) :: io_unit
!! which unit to write to, if not negative
LOGICAL :: always_checksum
CHARACTER(len=*), PARAMETER :: routineN = 'test_multiplies_multiproc'
INTEGER :: error_handle, &
loop_iter, mynode, numnodes, test
INTEGER(kind=int_8) :: flop, flop_sum
INTEGER, DIMENSION(2) :: npdims
INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: col_dist_a, col_dist_b, col_dist_c, &
row_dist_a, row_dist_b, row_dist_c
LOGICAL :: i_am_alive
REAL(kind=real_8) :: cs, cs_pos, flops_all, t1, t2
TYPE(dbcsr_distribution_obj) :: dist_a, dist_b, dist_c
TYPE(dbcsr_mp_obj) :: mp_env
TYPE(dbcsr_type) :: m_a, m_b, m_c, m_c_reserve
TYPE(mp_comm_type) :: cart_group, group
! ---------------------------------------------------------------------------
CALL timeset(routineN, error_handle)
IF (SIZE(group_sizes, 2) /= 2) &
DBCSR_ABORT("second dimension of group_sizes must be 2")
p_sizes: DO test = 1, SIZE(group_sizes, 1)
t2 = 0.0_real_8
flop_sum = 0
npdims(1:2) = group_sizes(test, 1:2)
numnodes = npdims(1)*npdims(2)
group = dbcsr_mp_group(dbcsr_distribution_mp( &
dbcsr_distribution(matrix_c)))
IF (numnodes .EQ. 0) THEN
CALL dbcsr_mp_make_env(mp_env, cart_group, group, nprocs=MAXVAL(npdims))
ELSE
CALL dbcsr_mp_make_env(mp_env, cart_group, group, pgrid_dims=npdims)
END IF
IF (numnodes < 0) &
DBCSR_ABORT("Cartesian sides must be greater or equal to 0")
i_am_alive = dbcsr_mp_active(mp_env)
alive: IF (i_am_alive) THEN
npdims(1) = dbcsr_mp_nprows(mp_env)
npdims(2) = dbcsr_mp_npcols(mp_env)
group = dbcsr_mp_group(mp_env)
CALL mp_environ(numnodes, mynode, group)
! Row & column distributions
CALL dbcsr_dist_bin(row_dist_a, &
dbcsr_nblkrows_total(matrix_a), npdims(1), &
dbcsr_row_block_sizes(matrix_a))
CALL dbcsr_dist_bin(col_dist_a, &
dbcsr_nblkcols_total(matrix_a), npdims(2), &
dbcsr_col_block_sizes(matrix_a))
CALL dbcsr_dist_bin(row_dist_b, &
dbcsr_nblkrows_total(matrix_b), npdims(1), &
dbcsr_row_block_sizes(matrix_b))
CALL dbcsr_dist_bin(col_dist_b, &
dbcsr_nblkcols_total(matrix_b), npdims(2), &
dbcsr_col_block_sizes(matrix_b))
CALL dbcsr_dist_bin(row_dist_c, &
dbcsr_nblkrows_total(matrix_c), npdims(1), &
dbcsr_row_block_sizes(matrix_c))
CALL dbcsr_dist_bin(col_dist_c, &
dbcsr_nblkcols_total(matrix_c), npdims(2), &
dbcsr_col_block_sizes(matrix_c))
CALL dbcsr_distribution_new(dist_a, &
mp_env, row_dist_a, col_dist_a, reuse_arrays=.TRUE.)
CALL dbcsr_distribution_new(dist_b, &
mp_env, row_dist_b, col_dist_b, reuse_arrays=.TRUE.)
CALL dbcsr_distribution_new(dist_c, &
mp_env, row_dist_c, col_dist_c, reuse_arrays=.TRUE.)
! Redistribute the matrices
! A
CALL dbcsr_create(m_a, "Test for "//TRIM(dbcsr_name(matrix_a)), &
dist_a, dbcsr_type_no_symmetry, &
row_blk_size_obj=matrix_a%row_blk_size, &
col_blk_size_obj=matrix_a%col_blk_size, &
data_type=dbcsr_get_data_type(matrix_a))
CALL dbcsr_distribution_release(dist_a)
CALL dbcsr_redistribute(matrix_a, m_a)
! B
CALL dbcsr_create(m_b, "Test for "//TRIM(dbcsr_name(matrix_b)), &
dist_b, dbcsr_type_no_symmetry, &
row_blk_size_obj=matrix_b%row_blk_size, &
col_blk_size_obj=matrix_b%col_blk_size, &
data_type=dbcsr_get_data_type(matrix_b))
CALL dbcsr_distribution_release(dist_b)
CALL dbcsr_redistribute(matrix_b, m_b)
! C
CALL dbcsr_create(m_c, "Test for "//TRIM(dbcsr_name(matrix_c)), &
dist_c, dbcsr_type_no_symmetry, &
row_blk_size_obj=matrix_c%row_blk_size, &
col_blk_size_obj=matrix_c%col_blk_size, &
data_type=dbcsr_get_data_type(matrix_c))
CALL dbcsr_distribution_release(dist_c)
CALL dbcsr_redistribute(matrix_c, m_c)
CALL dbcsr_copy(m_c_reserve, m_c)
! Perform multiply
loops: DO loop_iter = 1, n_loops
CALL dbcsr_release(m_c)
CALL dbcsr_copy(m_c, m_c_reserve)
CALL mp_sync(group)
t1 = -m_walltime()
IF (PRESENT(limits)) THEN
IF (eps .LE. -0.0_dp) THEN
CALL dbcsr_multiply(transa, transb, alpha, &
m_a, m_b, beta, m_c, &
first_row=limits(1), &
last_row=limits(2), &
first_column=limits(3), &
last_column=limits(4), &
first_k=limits(5), &
last_k=limits(6), &
retain_sparsity=retain_sparsity, flop=flop)
ELSE
CALL dbcsr_multiply(transa, transb, alpha, &
m_a, m_b, beta, m_c, &
first_row=limits(1), &
last_row=limits(2), &
first_column=limits(3), &
last_column=limits(4), &
first_k=limits(5), &
last_k=limits(6), &
retain_sparsity=retain_sparsity, flop=flop, &
filter_eps=eps)
END IF
ELSE
IF (eps .LE. -0.0_dp) THEN
CALL dbcsr_multiply(transa, transb, alpha, &
m_a, m_b, beta, m_c, &
retain_sparsity=retain_sparsity, flop=flop)
ELSE
CALL dbcsr_multiply(transa, transb, alpha, &
m_a, m_b, beta, m_c, &
retain_sparsity=retain_sparsity, flop=flop, &
filter_eps=eps)
END IF
END IF
t1 = t1 + m_walltime()
t2 = t2 + t1
flop_sum = flop_sum + flop
!
CALL mp_max(t1, group)
CALL mp_sum(flop, group)
t1 = MAX(t1, EPSILON(t1))
flops_all = REAL(flop, KIND=real_8)/t1/numnodes/(1024*1024)
IF (io_unit .GT. 0) THEN
WRITE (io_unit, '(A,I5,A,I5,A,F12.3,A,I9,A)') &
" loop ", loop_iter, " with ", numnodes, " MPI ranks: using ", t1, "s ", INT(flops_all), " Mflops/rank"
CALL m_flush(io_unit)
END IF
IF (loop_iter .EQ. n_loops .OR. always_checksum) THEN
cs = dbcsr_checksum(m_c)
cs_pos = dbcsr_checksum(m_c, pos=.TRUE.)
IF (io_unit > 0) THEN
WRITE (io_unit, *) "Final checksums", cs, cs_pos
END IF
END IF
END DO loops
! Release
CALL dbcsr_mp_release(mp_env)
CALL dbcsr_release(m_a)
CALL dbcsr_release(m_b)
CALL dbcsr_release(m_c)
CALL dbcsr_release(m_c_reserve)
END IF alive
CALL mp_comm_free(cart_group)
END DO p_sizes
CALL timestop(error_handle)
END SUBROUTINE test_multiplies_multiproc