Launch an accelerated transpose kernel
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(acc_devmem_type), | intent(in) | :: | trs_stack | |||
integer, | intent(in) | :: | offset | |||
integer, | intent(in) | :: | nblks | |||
integer, | intent(in) | :: | data_type | |||
type(acc_devmem_type), | intent(in) | :: | buffer | |||
integer, | intent(in) | :: | m | |||
integer, | intent(in) | :: | n | |||
type(acc_stream_type), | intent(in) | :: | stream |
SUBROUTINE dbcsr_acc_transpose(trs_stack, offset, nblks, data_type, buffer, m, n, stream)
!! Launch an accelerated transpose kernel
TYPE(acc_devmem_type), INTENT(IN) :: trs_stack
INTEGER, INTENT(IN) :: offset
INTEGER, INTENT(IN) :: nblks
INTEGER, INTENT(IN) :: data_type
TYPE(acc_devmem_type), INTENT(IN) :: buffer
INTEGER, INTENT(IN) :: m, n
TYPE(acc_stream_type), INTENT(IN) :: stream
#if ! defined (__DBCSR_ACC)
MARK_USED(trs_stack)
MARK_USED(offset)
MARK_USED(nblks)
MARK_USED(data_type)
MARK_USED(buffer)
MARK_USED(m)
MARK_USED(n)
MARK_USED(stream)
DBCSR_ABORT("__DBCSR_ACC not compiled in.")
#else
CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_acc_transpose'
INTEGER :: error_handle, istat
IF (careful_mod) CALL timeset(routineN, error_handle)
istat = 0
! Call batched in-place transpose in libsmm_acc
IF (m .LE. max_kernel_dim .AND. &
n .LE. max_kernel_dim) THEN
istat = libsmm_acc_transpose_cu(acc_devmem_cptr(trs_stack), &
INT(offset, KIND=C_INT), &
INT(nblks, KIND=C_INT), &
acc_devmem_cptr(buffer), &
INT(data_type, KIND=C_INT), &
INT(m, KIND=C_INT), &
INT(n, KIND=C_INT), &
INT(max_kernel_dim, KIND=C_INT), &
acc_stream_cptr(stream))
END IF
IF (istat /= 0) DBCSR_ABORT("something went wrong.")
IF (careful_mod) CALL timestop(error_handle)
#endif
END SUBROUTINE dbcsr_acc_transpose