Launch an accelerated transpose kernel
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(acc_devmem_type), | intent(in) | :: | trs_stack | |||
integer, | intent(in) | :: | offset | |||
integer, | intent(in) | :: | nblks | |||
integer, | intent(in) | :: | data_type | |||
type(acc_devmem_type), | intent(in) | :: | buffer | |||
integer, | intent(in) | :: | m | |||
integer, | intent(in) | :: | n | |||
type(acc_stream_type), | intent(in) | :: | stream |
SUBROUTINE dbcsr_acc_transpose(trs_stack, offset, nblks, data_type, buffer, m, n, stream) !! Launch an accelerated transpose kernel TYPE(acc_devmem_type), INTENT(IN) :: trs_stack INTEGER, INTENT(IN) :: offset INTEGER, INTENT(IN) :: nblks INTEGER, INTENT(IN) :: data_type TYPE(acc_devmem_type), INTENT(IN) :: buffer INTEGER, INTENT(IN) :: m, n TYPE(acc_stream_type), INTENT(IN) :: stream #if ! defined (__DBCSR_ACC) MARK_USED(trs_stack) MARK_USED(offset) MARK_USED(nblks) MARK_USED(data_type) MARK_USED(buffer) MARK_USED(m) MARK_USED(n) MARK_USED(stream) DBCSR_ABORT("__DBCSR_ACC not compiled in.") #else CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_acc_transpose' INTEGER :: error_handle, istat IF (careful_mod) CALL timeset(routineN, error_handle) istat = 0 ! Call batched in-place transpose in libsmm_acc IF (m .LE. max_kernel_dim .AND. & n .LE. max_kernel_dim) THEN istat = libsmm_acc_transpose_cu(acc_devmem_cptr(trs_stack), & INT(offset, KIND=C_INT), & INT(nblks, KIND=C_INT), & acc_devmem_cptr(buffer), & INT(data_type, KIND=C_INT), & INT(m, KIND=C_INT), & INT(n, KIND=C_INT), & INT(max_kernel_dim, KIND=C_INT), & acc_stream_cptr(stream)) END IF IF (istat /= 0) DBCSR_ABORT("something went wrong.") IF (careful_mod) CALL timestop(error_handle) #endif END SUBROUTINE dbcsr_acc_transpose