Launch an accelerated kernel for processing a stack.
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
integer, | intent(in), | DIMENSION(:, :), TARGET | :: | param_stack_host | ||
type(acc_devmem_type), | intent(in) | :: | param_stack_dev | |||
integer, | intent(in) | :: | stack_size | |||
integer, | intent(in) | :: | data_type | |||
type(acc_devmem_type), | intent(in) | :: | a_data | |||
type(acc_devmem_type), | intent(in) | :: | b_data | |||
type(acc_devmem_type), | intent(inout) | :: | c_data | |||
integer, | intent(in) | :: | m_max | |||
integer, | intent(in) | :: | n_max | |||
integer, | intent(in) | :: | k_max | |||
logical, | intent(in) | :: | def_mnk | |||
type(acc_stream_type), | intent(in) | :: | stack_stream | |||
type(acc_stream_type), | intent(in) | :: | c_stream | |||
logical, | intent(inout) | :: | success |
SUBROUTINE dbcsr_acc_do_mm_stack(param_stack_host, param_stack_dev, stack_size, data_type, &
a_data, b_data, c_data, m_max, n_max, k_max, def_mnk, &
stack_stream, c_stream, success)
!! Launch an accelerated kernel for processing a stack.
INTEGER, DIMENSION(:, :), TARGET, INTENT(IN) :: param_stack_host
TYPE(acc_devmem_type), INTENT(IN) :: param_stack_dev
INTEGER, INTENT(IN) :: stack_size
INTEGER, INTENT(IN) :: data_type
TYPE(acc_devmem_type), INTENT(IN) :: a_data, b_data
TYPE(acc_devmem_type), INTENT(INOUT) :: c_data
INTEGER, INTENT(IN) :: m_max, n_max, k_max
LOGICAL, INTENT(IN) :: def_mnk
TYPE(acc_stream_type), INTENT(IN) :: stack_stream, c_stream
LOGICAL, INTENT(INOUT) :: success
#if ! defined (__DBCSR_ACC)
MARK_USED(param_stack_host)
MARK_USED(param_stack_dev)
MARK_USED(stack_size)
MARK_USED(data_type)
MARK_USED(a_data)
MARK_USED(b_data)
MARK_USED(c_data)
MARK_USED(m_max)
MARK_USED(n_max)
MARK_USED(k_max)
MARK_USED(def_mnk)
MARK_USED(stack_stream)
MARK_USED(c_stream)
MARK_USED(success)
DBCSR_ABORT("__DBCSR_ACC not compiled in.")
#else
CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_acc_do_mm_stack'
INTEGER :: error_handle, istat
INTEGER(KIND=C_INT) :: mnk
INTEGER, DIMENSION(:, :), POINTER :: param_stack_host_ptr
param_stack_host_ptr => param_stack_host(:, :)
IF (careful_mod) CALL timeset(routineN, error_handle)
mnk = 0
IF (def_mnk) mnk = 1
! Call batched matrix-matrix multiplication in libsmm_acc
istat = libsmm_acc_process_cu(C_LOC(param_stack_host_ptr), &
acc_devmem_cptr(param_stack_dev), &
INT(stack_size, KIND=C_INT), &
INT(data_type, KIND=C_INT), &
acc_devmem_cptr(a_data), &
acc_devmem_cptr(b_data), &
acc_devmem_cptr(c_data), &
INT(m_max, KIND=C_INT), &
INT(n_max, KIND=C_INT), &
INT(k_max, KIND=C_INT), &
INT(max_kernel_dim, KIND=C_INT), &
mnk, acc_stream_cptr(stack_stream), acc_stream_cptr(c_stream))
! IF (istat == -10) DBCSR_ABORT("Data type not supported with GPU backend.")
! IF (istat == -20) DBCSR_ABORT("GPU kernel not JIT-ed.")
success = (istat == 0) ! false if no suitable kernel was found
IF (careful_mod) CALL timestop(error_handle)
#endif
END SUBROUTINE dbcsr_acc_do_mm_stack