Collects the timing or energy reports from all MPI ranks.
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(list_routinereport_type), | intent(inout) | :: | reports | |||
integer, | intent(in) | :: | cost_type | |||
type(dbcsr_mp_obj), | intent(in) | :: | mp_env |
SUBROUTINE collect_reports_from_ranks(reports, cost_type, mp_env) !! Collects the timing or energy reports from all MPI ranks. TYPE(list_routinereport_type), INTENT(INOUT) :: reports INTEGER, INTENT(IN) :: cost_type TYPE(dbcsr_mp_obj), INTENT(IN) :: mp_env CHARACTER(LEN=default_string_length) :: routineN INTEGER :: local_routine_id, sending_rank INTEGER, ALLOCATABLE, DIMENSION(:) :: collected REAL(KIND=dp) :: foobar REAL(KIND=dp), DIMENSION(2) :: dbuf TYPE(routine_report_type), POINTER :: r_report TYPE(routine_stat_type), POINTER :: r_stat TYPE(timer_env_type), POINTER :: timer_env NULLIFY (r_stat, r_report, timer_env) IF (.NOT. list_isready(reports)) & DBCSR_ABORT("BUG") timer_env => get_timer_env() ! make sure all functions have been called so that list_size(timer_env%routine_stats) ! and the actual dictionary are consistent in the loop below, preventing out of bounds. ! this hack makes sure they are called before routineN = "" CALL mp_bcast(routineN, 0, mp_env%mp%mp_group) sending_rank = 0 CALL mp_max(sending_rank, mp_env%mp%mp_group) CALL mp_sum(sending_rank, mp_env%mp%mp_group) foobar = 0.0_dp CALL mp_max(foobar, mp_env%mp%mp_group) dbuf = 0.0_dp CALL mp_maxloc(dbuf, mp_env%mp%mp_group) CALL mp_sum(foobar, mp_env%mp%mp_group) ! end hack ! Array collected is used as a bit field. ! It's of type integer in order to use the convenient MINLOC routine. ALLOCATE (collected(list_size(timer_env%routine_stats))) collected(:) = 0 DO ! does any rank have uncollected stats? sending_rank = -1 IF (.NOT. ALL(collected == 1)) sending_rank = mp_env%mp%mynode CALL mp_max(sending_rank, mp_env%mp%mp_group) IF (sending_rank < 0) EXIT ! every rank got all routines collected IF (sending_rank == mp_env%mp%mynode) THEN local_routine_id = MINLOC(collected, dim=1) r_stat => list_get(timer_env%routine_stats, local_routine_id) routineN = r_stat%routineN END IF CALL mp_bcast(routineN, sending_rank, mp_env%mp%mp_group) ! Create new report for routineN ALLOCATE (r_report) CALL list_push(reports, r_report) r_report%routineN = routineN ! If routineN was called on local node, add local stats IF (dict_haskey(timer_env%routine_names, routineN)) THEN local_routine_id = dict_get(timer_env%routine_names, routineN) collected(local_routine_id) = 1 r_stat => list_get(timer_env%routine_stats, local_routine_id) r_report%max_total_calls = r_stat%total_calls r_report%sum_total_calls = r_stat%total_calls r_report%sum_stackdepth = r_stat%stackdepth_accu SELECT CASE (cost_type) CASE (cost_type_energy) r_report%max_icost = r_stat%incl_energy_accu r_report%sum_icost = r_stat%incl_energy_accu r_report%max_ecost = r_stat%excl_energy_accu r_report%sum_ecost = r_stat%excl_energy_accu CASE (cost_type_time) r_report%max_icost = r_stat%incl_walltime_accu r_report%sum_icost = r_stat%incl_walltime_accu r_report%max_ecost = r_stat%excl_walltime_accu r_report%sum_ecost = r_stat%excl_walltime_accu CASE DEFAULT DBCSR_ABORT("BUG") END SELECT END IF ! collect stats of routineN via MPI CALL mp_max(r_report%max_total_calls, mp_env%mp%mp_group) CALL mp_sum(r_report%sum_total_calls, mp_env%mp%mp_group) CALL mp_sum(r_report%sum_stackdepth, mp_env%mp%mp_group) ! get value and rank of the maximum inclusive cost dbuf = (/r_report%max_icost, REAL(mp_env%mp%mynode, KIND=dp)/) CALL mp_maxloc(dbuf, mp_env%mp%mp_group) r_report%max_icost = dbuf(1) r_report%max_irank = INT(dbuf(2)) CALL mp_sum(r_report%sum_icost, mp_env%mp%mp_group) ! get value and rank of the maximum exclusive cost dbuf = (/r_report%max_ecost, REAL(mp_env%mp%mynode, KIND=dp)/) CALL mp_maxloc(dbuf, mp_env%mp%mp_group) r_report%max_ecost = dbuf(1) r_report%max_erank = INT(dbuf(2)) CALL mp_sum(r_report%sum_ecost, mp_env%mp%mp_group) END DO END SUBROUTINE collect_reports_from_ranks