Processes a given stack. From here on there is no boundary checking due to assumed-SIZE-arguments. This is useful to check stack parameters, BUT it works only for kind=dp
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(dbcsr_mm_sched_type), | intent(inout) | :: | this | |||
type(dbcsr_type), | intent(in) | :: | left | |||
type(dbcsr_type), | intent(in) | :: | right | |||
integer, | DIMENSION(:, :), POINTER | :: | stack_data | |||
integer, | POINTER | :: | stack_fillcount | |||
type(stack_descriptor_type), | intent(in) | :: | stack_descr |
SUBROUTINE dbcsr_mm_sched_process(this, left, right, stack_data, & stack_fillcount, stack_descr) !! Processes a given stack. TYPE(dbcsr_mm_sched_type), INTENT(INOUT) :: this TYPE(dbcsr_type), INTENT(IN) :: left, right INTEGER, DIMENSION(:, :), POINTER :: stack_data INTEGER, POINTER :: stack_fillcount TYPE(stack_descriptor_type), INTENT(IN) :: stack_descr INTEGER :: ithread, sp, stacked_datasize INTEGER(kind=int_8) :: flop_per_entry, total_flop LOGICAL :: success, generated_acc_untuned, used_smm TYPE(stats_type), POINTER :: mystats IF (stack_fillcount <= 0) & DBCSR_ABORT("dbcsr_mm_sched_process: got empty stack") ithread = 0 !$ ithread = OMP_GET_THREAD_NUM() mystats => stats_per_thread(ithread) CALL ensure_product_wm_cleared(this) stacked_datasize = this%product_wm%datasize CALL dbcsr_data_ensure_size(this%product_wm%data_area, stacked_datasize, & factor=default_resize_factor, zero_pad=.TRUE.) !!From here on there is no boundary checking due to assumed-SIZE-arguments. !!This is useful to check stack parameters, BUT it works only for kind=dp IF (.FALSE.) THEN DO sp = 1, stack_fillcount IF (stack_data(p_a_first, sp) > SIZE(left%data_area%d%r_dp)) & DBCSR_ABORT("left data out of range") IF (stack_data(p_b_first, sp) > SIZE(right%data_area%d%r_dp)) & DBCSR_ABORT("right data out of range") IF (stack_data(p_c_first, sp) > SIZE(this%product_wm%data_area%d%r_dp)) THEN WRITE (*, *) "blub: ", stack_data(p_c_first, sp), SIZE(this%product_wm%data_area%d%r_dp), & dbcsr_data_get_size(this%product_wm%data_area), stacked_datasize DBCSR_ABORT("product data out of range") END IF END DO END IF IF (.FALSE.) THEN ! Check if homogeneous stacks are indeed homogeneous IF (stack_descr%defined_mnk) THEN DO sp = 1, stack_fillcount IF (stack_data(p_m, sp) /= stack_descr%m) & DBCSR_ABORT("homogeneous stacks check failed") IF (stack_data(p_n, sp) /= stack_descr%n) & DBCSR_ABORT("homogeneous stacks check failed") IF (stack_data(p_k, sp) /= stack_descr%k) & DBCSR_ABORT("homogeneous stacks check failed") END DO END IF END IF ! Submitting the stack for processing ------------------------------------- flop_per_entry = INT(2, KIND=int_8)*stack_descr%max_m*stack_descr%max_n*stack_descr%max_k total_flop = stack_fillcount*flop_per_entry IF (use_acc() .AND. & flop_per_entry > dbcsr_cfg%accdrv_min_flop_process%val .AND. & (.NOT. this%avoid_accdrv) .AND. & (stack_descr%defined_mnk .OR. dbcsr_cfg%accdrv_do_inhomogenous%val)) THEN CALL dbcsr_mm_accdrv_process( & this%accdrv, & left, right, & params=stack_data, & stack_size=stack_fillcount, & stack_descr=stack_descr, & success=success, & generated_acc_untuned=generated_acc_untuned) IF (success) THEN ! update statistics mystats%acc_num_stacks = mystats%acc_num_stacks + 1 mystats%acc_flop = mystats%acc_flop + total_flop CALL stats_add(mystats, & m=stack_descr%m, n=stack_descr%n, k=stack_descr%k, & stacksize_acc=INT(stack_fillcount, kind=int_8), & generated_acc_untuned=generated_acc_untuned) RETURN ELSE this%avoid_accdrv = dbcsr_cfg%accdrv_avoid_after_busy%val END IF END IF !WRITE (*,*) "dbcsr_mm_sched_process: running hostdrv_process, stack_fillcount:", stack_fillcount CALL dbcsr_mm_hostdrv_process( & this%hostdrv, & left, right, & params=stack_data, & stack_size=stack_fillcount, & stack_descr=stack_descr, & success=success, & used_smm=used_smm) IF (.NOT. success) DBCSR_ABORT("dbcsr_mm_sched_process_stack failed") ! update statistics IF (used_smm) THEN mystats%smm_num_stacks = mystats%smm_num_stacks + 1 mystats%smm_flop = mystats%smm_flop + total_flop CALL stats_add(mystats, & m=stack_descr%m, n=stack_descr%n, k=stack_descr%k, & stacksize_smm=INT(stack_fillcount, kind=int_8)) ELSE mystats%cpu_num_stacks = mystats%cpu_num_stacks + 1 mystats%cpu_flop = mystats%cpu_flop + total_flop CALL stats_add(mystats, & m=stack_descr%m, n=stack_descr%n, k=stack_descr%k, & stacksize_cpu=INT(stack_fillcount, kind=int_8)) END IF END SUBROUTINE dbcsr_mm_sched_process