Start timer
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
character(len=*), | intent(in) | :: | routineN | |||
integer, | intent(out) | :: | handle |
SUBROUTINE timeset_handler(routineN, handle) !! Start timer CHARACTER(LEN=*), INTENT(IN) :: routineN INTEGER, INTENT(OUT) :: handle CHARACTER(LEN=400) :: line, mystring CHARACTER(LEN=60) :: sformat CHARACTER(LEN=default_string_length) :: routine_name_dsl INTEGER :: routine_id, stack_size #if defined( __HIP_PROFILING ) INTEGER :: ret #endif INTEGER(KIND=int_8) :: cpumem, gpumem_free, gpumem_total TYPE(callstack_entry_type) :: cs_entry TYPE(routine_stat_type), POINTER :: r_stat TYPE(timer_env_type), POINTER :: timer_env !$OMP MASTER ! Default value, using a negative value when timing is not taken cs_entry%walltime_start = -HUGE(1.0_dp) cs_entry%energy_start = -HUGE(1.0_dp) ! routine_name_dsl = routineN ! converts to default_string_length routine_id = routine_name2id(routine_name_dsl) ! ! Take timings when the timings_level is appropriated IF (global_timings_level .NE. 0) THEN cs_entry%walltime_start = m_walltime() cs_entry%energy_start = m_energy() END IF timer_env => list_peek(timers_stack) IF (LEN_TRIM(routineN) > default_string_length) THEN DBCSR_ABORT('timings_timeset: routineN too long: "'//TRIM(routineN)//"'") END IF ! update routine r_stats r_stat => list_get(timer_env%routine_stats, routine_id) stack_size = list_size(timer_env%callstack) r_stat%total_calls = r_stat%total_calls + 1 r_stat%active_calls = r_stat%active_calls + 1 r_stat%stackdepth_accu = r_stat%stackdepth_accu + stack_size + 1 ! add routine to callstack cs_entry%routine_id = routine_id CALL list_push(timer_env%callstack, cs_entry) !..if debug mode echo the subroutine name IF ((timer_env%trace_all .OR. r_stat%trace) .AND. & (r_stat%total_calls < timer_env%trace_max)) THEN WRITE (sformat, *) "(A,A,", MAX(1, 3*stack_size - 4), "X,I4,1X,I6,1X,A,A)" WRITE (mystring, sformat) timer_env%trace_str, ">>", stack_size + 1, & r_stat%total_calls, TRIM(r_stat%routineN), " start" CALL acc_devmem_info(gpumem_free, gpumem_total) CALL m_memory(cpumem) WRITE (line, '(A,A,I0,A,A,I0,A)') TRIM(mystring), & " Hostmem: ", (cpumem + 1024**2 - 1)/1024**2, " MiB", & " GPUmem: ", (gpumem_total - gpumem_free)/1024**2, " MiB" WRITE (timer_env%trace_unit, *) TRIM(line) CALL m_flush(timer_env%trace_unit) END IF handle = routine_id #if defined( __CUDA_PROFILING ) CALL cuda_nvtx_range_push(routineN) #endif #if defined( __HIP_PROFILING ) ret = roctxRangePushA(routineN//C_NULL_CHAR) #endif !$OMP END MASTER END SUBROUTINE timeset_handler