Start timer
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
character(len=*), | intent(in) | :: | routineN | |||
integer, | intent(out) | :: | handle |
SUBROUTINE timeset_handler(routineN, handle)
!! Start timer
CHARACTER(LEN=*), INTENT(IN) :: routineN
INTEGER, INTENT(OUT) :: handle
CHARACTER(LEN=400) :: line, mystring
CHARACTER(LEN=60) :: sformat
CHARACTER(LEN=default_string_length) :: routine_name_dsl
INTEGER :: routine_id, stack_size
#if defined( __HIP_PROFILING )
INTEGER :: ret
#endif
INTEGER(KIND=int_8) :: cpumem, gpumem_free, gpumem_total
TYPE(callstack_entry_type) :: cs_entry
TYPE(routine_stat_type), POINTER :: r_stat
TYPE(timer_env_type), POINTER :: timer_env
!$OMP MASTER
! Default value, using a negative value when timing is not taken
cs_entry%walltime_start = -HUGE(1.0_dp)
cs_entry%energy_start = -HUGE(1.0_dp)
!
routine_name_dsl = routineN ! converts to default_string_length
routine_id = routine_name2id(routine_name_dsl)
!
! Take timings when the timings_level is appropriated
IF (global_timings_level .NE. 0) THEN
cs_entry%walltime_start = m_walltime()
cs_entry%energy_start = m_energy()
END IF
timer_env => list_peek(timers_stack)
IF (LEN_TRIM(routineN) > default_string_length) THEN
DBCSR_ABORT('timings_timeset: routineN too long: "'//TRIM(routineN)//"'")
END IF
! update routine r_stats
r_stat => list_get(timer_env%routine_stats, routine_id)
stack_size = list_size(timer_env%callstack)
r_stat%total_calls = r_stat%total_calls + 1
r_stat%active_calls = r_stat%active_calls + 1
r_stat%stackdepth_accu = r_stat%stackdepth_accu + stack_size + 1
! add routine to callstack
cs_entry%routine_id = routine_id
CALL list_push(timer_env%callstack, cs_entry)
!..if debug mode echo the subroutine name
IF ((timer_env%trace_all .OR. r_stat%trace) .AND. &
(r_stat%total_calls < timer_env%trace_max)) THEN
WRITE (sformat, *) "(A,A,", MAX(1, 3*stack_size - 4), "X,I4,1X,I6,1X,A,A)"
WRITE (mystring, sformat) timer_env%trace_str, ">>", stack_size + 1, &
r_stat%total_calls, TRIM(r_stat%routineN), " start"
CALL acc_devmem_info(gpumem_free, gpumem_total)
CALL m_memory(cpumem)
WRITE (line, '(A,A,I0,A,A,I0,A)') TRIM(mystring), &
" Hostmem: ", (cpumem + 1024**2 - 1)/1024**2, " MiB", &
" GPUmem: ", (gpumem_total - gpumem_free)/1024**2, " MiB"
WRITE (timer_env%trace_unit, *) TRIM(line)
CALL m_flush(timer_env%trace_unit)
END IF
handle = routine_id
#if defined( __CUDA_PROFILING )
CALL cuda_nvtx_range_push(routineN)
#endif
#if defined( __HIP_PROFILING )
ret = roctxRangePushA(routineN//C_NULL_CHAR)
#endif
!$OMP END MASTER
END SUBROUTINE timeset_handler