calculate norms for a set of blocks in matrix whose row and col sizes are given
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(dbcsr_type), | intent(in) | :: | matrix | |||
real(kind=sp), | intent(out), | DIMENSION(:) | :: | norms | ||
type(dbcsr_data_obj), | intent(inout), | TARGET | :: | normsbuf | ||
type(dbcsr_data_obj), | intent(inout), | TARGET | :: | offsetsbuf | ||
type(dbcsr_data_obj), | intent(inout), | TARGET | :: | nelemsbuf | ||
integer, | intent(in), | DIMENSION(:), POINTER, CONTIGUOUS | :: | row_blk_sizes | ||
integer, | intent(in), | DIMENSION(:), POINTER, CONTIGUOUS | :: | col_blk_sizes |
SUBROUTINE acc_calculate_norms(matrix, norms, normsbuf, offsetsbuf, nelemsbuf, row_blk_sizes, col_blk_sizes) !! calculate norms for a set of blocks in matrix whose row and col sizes are given TYPE(dbcsr_type), INTENT(IN) :: matrix REAL(kind=sp), DIMENSION(:), INTENT(OUT) :: norms TYPE(dbcsr_data_obj), INTENT(INOUT), TARGET :: normsbuf TYPE(dbcsr_data_obj), INTENT(INOUT), TARGET :: offsetsbuf TYPE(dbcsr_data_obj), INTENT(INOUT), TARGET :: nelemsbuf INTEGER, DIMENSION(:), POINTER, CONTIGUOUS, INTENT(IN) :: row_blk_sizes, col_blk_sizes CHARACTER(len=*), PARAMETER :: routineN = 'acc_calculate_norms' INTEGER :: nblks, blk_p, handle, i INTEGER :: nblks_final, j INTEGER :: data_type INTEGER :: row, col INTEGER, DIMENSION(:), POINTER :: blk_index REAL, DIMENSION(:), POINTER, CONTIGUOUS :: normsbuf_ptr INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: offsetsbuf_ptr INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: nelemsbuf_ptr #if defined (__DBCSR_ACC) INTEGER :: istat #endif CALL timeset(routineN, handle) blk_index => matrix%coo_l nblks = matrix%nblks data_type = dbcsr_get_data_type(matrix) if (nblks > 0 .and. data_type .eq. dbcsr_type_real_8) then IF (normsbuf%d%data_type /= dbcsr_type_real_4) & DBCSR_ABORT("acc_calculate_norms: normsbuf has wrong datatype") IF (offsetsbuf%d%data_type /= dbcsr_type_int_4) & DBCSR_ABORT("acc_calculate_norms: offsetsbuf has wrong datatype") IF (nelemsbuf%d%data_type /= dbcsr_type_int_4) & DBCSR_ABORT("acc_calculate_norms: nelemsbuf has wrong datatype") NULLIFY (normsbuf_ptr) NULLIFY (offsetsbuf_ptr) NULLIFY (nelemsbuf_ptr) CALL dbcsr_data_ensure_size(normsbuf, data_size=nblks, nocopy=.TRUE.) CALL dbcsr_data_set_size_referenced(normsbuf, nblks) normsbuf_ptr => normsbuf%d%r_sp CALL dbcsr_data_ensure_size(offsetsbuf, data_size=nblks, nocopy=.TRUE.) CALL dbcsr_data_set_size_referenced(offsetsbuf, nblks) offsetsbuf_ptr => offsetsbuf%d%i4 CALL dbcsr_data_ensure_size(nelemsbuf, data_size=nblks, nocopy=.TRUE.) CALL dbcsr_data_set_size_referenced(nelemsbuf, nblks) nelemsbuf_ptr => nelemsbuf%d%i4 j = 1 DO i = 1, nblks blk_p = blk_index(3*i) IF (blk_p == 0) CYCLE offsetsbuf_ptr(j) = blk_p - 1 row = blk_index(3*i - 2) col = blk_index(3*i - 1) nelemsbuf_ptr(j) = row_blk_sizes(row)*col_blk_sizes(col) j = j + 1 END DO nblks_final = j - 1 ! copy offsets to GPU buffer, launch kernel, copy norms back to host ! offsetsbuf, nelemsbuf and normsbuf share the same stream, so no need ! to synchronize stream until norms are copied back to host CALL dbcsr_data_host2dev(offsetsbuf) CALL dbcsr_data_host2dev(nelemsbuf) #if defined (__DBCSR_ACC) istat = acc_interface_calculate_norms(acc_devmem_cptr(matrix%data_area%d%acc_devmem), & INT(nblks_final, KIND=C_INT), & acc_devmem_cptr(offsetsbuf%d%acc_devmem), & acc_devmem_cptr(nelemsbuf%d%acc_devmem), & acc_devmem_cptr(normsbuf%d%acc_devmem), & acc_stream_cptr(normsbuf%d%memory_type%acc_stream)) IF (istat == -1) & DBCSR_ABORT("acc_calculate_norms: warp size obtained is unexpected") #endif CALL dbcsr_data_dev2host(normsbuf) CALL acc_stream_synchronize(normsbuf%d%memory_type%acc_stream) j = 1 DO i = 1, nblks blk_p = blk_index(3*i) IF (blk_p == 0) CYCLE norms(i) = normsbuf_ptr(j) j = j + 1 END DO else ! call CPU function to calculate norms CALL calculate_norms(matrix, norms, row_blk_sizes, col_blk_sizes) end if CALL timestop(handle) END SUBROUTINE acc_calculate_norms