block_partial_copy_2d2d_s Subroutine

private subroutine block_partial_copy_2d2d_s(dst, dst_tr, src, src_tr, dst_r_lb, dst_c_lb, src_r_lb, src_c_lb, nrow, ncol)

Copies a block subset

Note

see block_partial_copy_a

Arguments

Type IntentOptional Attributes Name
real(kind=real_4), intent(inout), DIMENSION(:, :) :: dst
logical, intent(in) :: dst_tr
real(kind=real_4), intent(in), DIMENSION(:, :) :: src
logical, intent(in) :: src_tr
integer, intent(in) :: dst_r_lb
integer, intent(in) :: dst_c_lb
integer, intent(in) :: src_r_lb
integer, intent(in) :: src_c_lb
integer, intent(in) :: nrow
integer, intent(in) :: ncol

Source Code

      PURE_BLOCKOPS SUBROUTINE block_partial_copy_2d2d_s (dst, dst_tr, &
                                                                      src, src_tr, &
                                                                      dst_r_lb, dst_c_lb, src_r_lb, src_c_lb, nrow, ncol)
     !! Copies a block subset
     !! @note see block_partial_copy_a

#if defined(__LIBXSMM_BLOCKOPS)
         USE libxsmm, ONLY: libxsmm_matcopy, libxsmm_otrans, libxsmm_ptr0
#else
         INTEGER                                  :: col, row
#endif
         REAL(kind=real_4), DIMENSION(:, :), &
            INTENT(INOUT)                         :: dst
         LOGICAL, INTENT(IN)                      :: dst_tr
         REAL(kind=real_4), DIMENSION(:, :), &
            INTENT(IN)                            :: src
         LOGICAL, INTENT(IN)                      :: src_tr
         INTEGER, INTENT(IN)                      :: dst_r_lb, dst_c_lb, src_r_lb, &
                                                     src_c_lb, nrow, ncol

!    ---------------------------------------------------------------------------
!    Factors out the 4 combinations to remove branches from the inner loop.
!    rs is the logical row size so it always remains the leading dimension.
         IF (.NOT. dst_tr .AND. .NOT. src_tr) THEN
#if defined(__LIBXSMM_BLOCKOPS)
            IF ((0 .LT. ncol) .AND. (0 .LT. nrow)) THEN
               CALL libxsmm_matcopy(libxsmm_ptr0(dst(dst_r_lb, dst_c_lb)), &
                                    libxsmm_ptr0(src(src_r_lb, src_c_lb)), &
                                    4, nrow, ncol, &
                                    SIZE(src, 1), SIZE(dst, 1))
            END IF
#else
            DO col = 0, ncol - 1
               DO row = 0, nrow - 1
                  dst(dst_r_lb + row, dst_c_lb + col) &
                     = src(src_r_lb + row, src_c_lb + col)
               END DO
            END DO
#endif
         ELSEIF (dst_tr .AND. .NOT. src_tr) THEN
#if defined(__LIBXSMM_BLOCKOPS)
            IF ((0 .LT. ncol) .AND. (0 .LT. nrow)) THEN
               CALL libxsmm_otrans(libxsmm_ptr0(dst(dst_c_lb, dst_r_lb)), &
                                   libxsmm_ptr0(src(src_r_lb, src_c_lb)), &
                                   4, nrow, ncol, &
                                   SIZE(src, 1), SIZE(dst, 2))
            END IF
#else
            DO col = 0, ncol - 1
               DO row = 0, nrow - 1
                  dst(dst_c_lb + col, dst_r_lb + row) &
                     = src(src_r_lb + row, src_c_lb + col)
               END DO
            END DO
#endif
         ELSEIF (.NOT. dst_tr .AND. src_tr) THEN
#if defined(__LIBXSMM_BLOCKOPS)
            IF ((0 .LT. ncol) .AND. (0 .LT. nrow)) THEN
               CALL libxsmm_otrans(libxsmm_ptr0(dst(dst_r_lb, dst_c_lb)), &
                                   libxsmm_ptr0(src(src_c_lb, src_r_lb)), &
                                   4, nrow, ncol, &
                                   SIZE(src, 2), SIZE(dst, 1))
            END IF
#else
            DO col = 0, ncol - 1
               DO row = 0, nrow - 1
                  dst(dst_r_lb + row, dst_c_lb + col) &
                     = src(src_c_lb + col, src_r_lb + row)
               END DO
            END DO
#endif
         ELSE
            DBCSR_ASSERT(dst_tr .AND. src_tr)
#if defined(__LIBXSMM_BLOCKOPS)
            IF ((0 .LT. ncol) .AND. (0 .LT. nrow)) THEN
               CALL libxsmm_matcopy(libxsmm_ptr0(dst(dst_c_lb, dst_r_lb)), &
                                    libxsmm_ptr0(src(src_c_lb, src_r_lb)), &
                                    4, nrow, ncol, &
                                    SIZE(src, 2), SIZE(dst, 2))
            END IF
#else
            DO col = 0, ncol - 1
               DO row = 0, nrow - 1
                  dst(dst_c_lb + col, dst_r_lb + row) &
                     = src(src_c_lb + col, src_r_lb + row)
               END DO
            END DO
#endif
         END IF
      END SUBROUTINE block_partial_copy_2d2d_s