acc_calculate_norms Subroutine

public subroutine acc_calculate_norms(matrix, norms, normsbuf, offsetsbuf, nelemsbuf, row_blk_sizes, col_blk_sizes)

calculate norms for a set of blocks in matrix whose row and col sizes are given

Arguments

Type IntentOptional Attributes Name
type(dbcsr_type), intent(in) :: matrix
real(kind=sp), intent(out), DIMENSION(:) :: norms
type(dbcsr_data_obj), intent(inout), TARGET :: normsbuf
type(dbcsr_data_obj), intent(inout), TARGET :: offsetsbuf
type(dbcsr_data_obj), intent(inout), TARGET :: nelemsbuf
integer, intent(in), DIMENSION(:), POINTER, CONTIGUOUS :: row_blk_sizes
integer, intent(in), DIMENSION(:), POINTER, CONTIGUOUS :: col_blk_sizes

Source Code

   SUBROUTINE acc_calculate_norms(matrix, norms, normsbuf, offsetsbuf, nelemsbuf, row_blk_sizes, col_blk_sizes)
      !! calculate norms for a set of blocks in matrix whose row and col sizes are given
      TYPE(dbcsr_type), INTENT(IN)                       :: matrix
      REAL(kind=sp), DIMENSION(:), INTENT(OUT)           :: norms
      TYPE(dbcsr_data_obj), INTENT(INOUT), TARGET        :: normsbuf
      TYPE(dbcsr_data_obj), INTENT(INOUT), TARGET        :: offsetsbuf
      TYPE(dbcsr_data_obj), INTENT(INOUT), TARGET        :: nelemsbuf
      INTEGER, DIMENSION(:), POINTER, CONTIGUOUS, INTENT(IN) :: row_blk_sizes, col_blk_sizes

      CHARACTER(len=*), PARAMETER :: routineN = 'acc_calculate_norms'

      INTEGER                                            :: nblks, blk_p, handle, i
      INTEGER                                            :: nblks_final, j
      INTEGER                                            :: data_type
      INTEGER                                            :: row, col
      INTEGER, DIMENSION(:), POINTER                     :: blk_index
      REAL, DIMENSION(:), POINTER, CONTIGUOUS            :: normsbuf_ptr
      INTEGER, DIMENSION(:), POINTER, CONTIGUOUS         :: offsetsbuf_ptr
      INTEGER, DIMENSION(:), POINTER, CONTIGUOUS         :: nelemsbuf_ptr
#if defined (__DBCSR_ACC)
      INTEGER                                            :: istat
#endif

      CALL timeset(routineN, handle)

      blk_index => matrix%coo_l
      nblks = matrix%nblks
      data_type = dbcsr_get_data_type(matrix)

      if (nblks > 0 .and. data_type .eq. dbcsr_type_real_8) then

         IF (normsbuf%d%data_type /= dbcsr_type_real_4) &
            DBCSR_ABORT("acc_calculate_norms: normsbuf has wrong datatype")
         IF (offsetsbuf%d%data_type /= dbcsr_type_int_4) &
            DBCSR_ABORT("acc_calculate_norms: offsetsbuf has wrong datatype")
         IF (nelemsbuf%d%data_type /= dbcsr_type_int_4) &
            DBCSR_ABORT("acc_calculate_norms: nelemsbuf has wrong datatype")

         NULLIFY (normsbuf_ptr)
         NULLIFY (offsetsbuf_ptr)
         NULLIFY (nelemsbuf_ptr)
         CALL dbcsr_data_ensure_size(normsbuf, data_size=nblks, nocopy=.TRUE.)
         CALL dbcsr_data_set_size_referenced(normsbuf, nblks)
         normsbuf_ptr => normsbuf%d%r_sp
         CALL dbcsr_data_ensure_size(offsetsbuf, data_size=nblks, nocopy=.TRUE.)
         CALL dbcsr_data_set_size_referenced(offsetsbuf, nblks)
         offsetsbuf_ptr => offsetsbuf%d%i4
         CALL dbcsr_data_ensure_size(nelemsbuf, data_size=nblks, nocopy=.TRUE.)
         CALL dbcsr_data_set_size_referenced(nelemsbuf, nblks)
         nelemsbuf_ptr => nelemsbuf%d%i4

         j = 1
         DO i = 1, nblks
            blk_p = blk_index(3*i)
            IF (blk_p == 0) CYCLE
            offsetsbuf_ptr(j) = blk_p - 1
            row = blk_index(3*i - 2)
            col = blk_index(3*i - 1)
            nelemsbuf_ptr(j) = row_blk_sizes(row)*col_blk_sizes(col)
            j = j + 1
         END DO
         nblks_final = j - 1

         ! copy offsets to GPU buffer, launch kernel, copy norms back to host
         ! offsetsbuf, nelemsbuf and normsbuf share the same stream, so no need
         ! to synchronize stream until norms are copied back to host
         CALL dbcsr_data_host2dev(offsetsbuf)
         CALL dbcsr_data_host2dev(nelemsbuf)
#if defined (__DBCSR_ACC)
         istat = acc_interface_calculate_norms(acc_devmem_cptr(matrix%data_area%d%acc_devmem), &
                                               INT(nblks_final, KIND=C_INT), &
                                               acc_devmem_cptr(offsetsbuf%d%acc_devmem), &
                                               acc_devmem_cptr(nelemsbuf%d%acc_devmem), &
                                               acc_devmem_cptr(normsbuf%d%acc_devmem), &
                                               acc_stream_cptr(normsbuf%d%memory_type%acc_stream))
         IF (istat == -1) &
            DBCSR_ABORT("acc_calculate_norms: warp size obtained is unexpected")
#endif
         CALL dbcsr_data_dev2host(normsbuf)
         CALL acc_stream_synchronize(normsbuf%d%memory_type%acc_stream)

         j = 1
         DO i = 1, nblks
            blk_p = blk_index(3*i)
            IF (blk_p == 0) CYCLE
            norms(i) = normsbuf_ptr(j)
            j = j + 1
         END DO
      else
         ! call CPU function to calculate norms
         CALL calculate_norms(matrix, norms, row_blk_sizes, col_blk_sizes)
      end if
      CALL timestop(handle)
   END SUBROUTINE acc_calculate_norms