collect_reports_from_ranks Subroutine

private subroutine collect_reports_from_ranks(reports, cost_type, mp_env)

Collects the timing or energy reports from all MPI ranks.

Arguments

Type IntentOptional Attributes Name
type(list_routinereport_type), intent(inout) :: reports
integer, intent(in) :: cost_type
type(dbcsr_mp_obj), intent(in) :: mp_env

Source Code

   SUBROUTINE collect_reports_from_ranks(reports, cost_type, mp_env)
      !! Collects the timing or energy reports from all MPI ranks.
      TYPE(list_routinereport_type), INTENT(INOUT)       :: reports
      INTEGER, INTENT(IN)                                :: cost_type
      TYPE(dbcsr_mp_obj), INTENT(IN)                     :: mp_env

      CHARACTER(LEN=default_string_length)               :: routineN
      INTEGER                                            :: local_routine_id, sending_rank
      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: collected
      REAL(KIND=dp)                                      :: foobar
      REAL(KIND=dp), DIMENSION(2)                        :: dbuf
      TYPE(routine_report_type), POINTER                 :: r_report
      TYPE(routine_stat_type), POINTER                   :: r_stat
      TYPE(timer_env_type), POINTER                      :: timer_env

      NULLIFY (r_stat, r_report, timer_env)
      IF (.NOT. list_isready(reports)) &
         DBCSR_ABORT("BUG")

      timer_env => get_timer_env()

      ! make sure all functions have been called so that list_size(timer_env%routine_stats)
      ! and the actual dictionary are consistent in the loop below, preventing out of bounds.
      ! this hack makes sure they are called before
      routineN = ""
      CALL mp_bcast(routineN, 0, mp_env%mp%mp_group)
      sending_rank = 0
      CALL mp_max(sending_rank, mp_env%mp%mp_group)
      CALL mp_sum(sending_rank, mp_env%mp%mp_group)
      foobar = 0.0_dp
      CALL mp_max(foobar, mp_env%mp%mp_group)
      dbuf = 0.0_dp
      CALL mp_maxloc(dbuf, mp_env%mp%mp_group)
      CALL mp_sum(foobar, mp_env%mp%mp_group)
      ! end hack

      ! Array collected is used as a bit field.
      ! It's of type integer in order to use the convenient MINLOC routine.
      ALLOCATE (collected(list_size(timer_env%routine_stats)))
      collected(:) = 0

      DO
         ! does any rank have uncollected stats?
         sending_rank = -1
         IF (.NOT. ALL(collected == 1)) sending_rank = mp_env%mp%mynode
         CALL mp_max(sending_rank, mp_env%mp%mp_group)
         IF (sending_rank < 0) EXIT ! every rank got all routines collected
         IF (sending_rank == mp_env%mp%mynode) THEN
            local_routine_id = MINLOC(collected, dim=1)
            r_stat => list_get(timer_env%routine_stats, local_routine_id)
            routineN = r_stat%routineN
         END IF
         CALL mp_bcast(routineN, sending_rank, mp_env%mp%mp_group)

         ! Create new report for routineN
         ALLOCATE (r_report)
         CALL list_push(reports, r_report)
         r_report%routineN = routineN

         ! If routineN was called on local node, add local stats
         IF (dict_haskey(timer_env%routine_names, routineN)) THEN
            local_routine_id = dict_get(timer_env%routine_names, routineN)
            collected(local_routine_id) = 1
            r_stat => list_get(timer_env%routine_stats, local_routine_id)
            r_report%max_total_calls = r_stat%total_calls
            r_report%sum_total_calls = r_stat%total_calls
            r_report%sum_stackdepth = r_stat%stackdepth_accu
            SELECT CASE (cost_type)
            CASE (cost_type_energy)
               r_report%max_icost = r_stat%incl_energy_accu
               r_report%sum_icost = r_stat%incl_energy_accu
               r_report%max_ecost = r_stat%excl_energy_accu
               r_report%sum_ecost = r_stat%excl_energy_accu
            CASE (cost_type_time)
               r_report%max_icost = r_stat%incl_walltime_accu
               r_report%sum_icost = r_stat%incl_walltime_accu
               r_report%max_ecost = r_stat%excl_walltime_accu
               r_report%sum_ecost = r_stat%excl_walltime_accu
            CASE DEFAULT
               DBCSR_ABORT("BUG")
            END SELECT
         END IF

         ! collect stats of routineN via MPI
         CALL mp_max(r_report%max_total_calls, mp_env%mp%mp_group)
         CALL mp_sum(r_report%sum_total_calls, mp_env%mp%mp_group)
         CALL mp_sum(r_report%sum_stackdepth, mp_env%mp%mp_group)

         ! get value and rank of the maximum inclusive cost
         dbuf = (/r_report%max_icost, REAL(mp_env%mp%mynode, KIND=dp)/)
         CALL mp_maxloc(dbuf, mp_env%mp%mp_group)
         r_report%max_icost = dbuf(1)
         r_report%max_irank = INT(dbuf(2))

         CALL mp_sum(r_report%sum_icost, mp_env%mp%mp_group)

         ! get value and rank of the maximum exclusive cost
         dbuf = (/r_report%max_ecost, REAL(mp_env%mp%mynode, KIND=dp)/)
         CALL mp_maxloc(dbuf, mp_env%mp%mp_group)
         r_report%max_ecost = dbuf(1)
         r_report%max_erank = INT(dbuf(2))

         CALL mp_sum(r_report%sum_ecost, mp_env%mp%mp_group)
      END DO

   END SUBROUTINE collect_reports_from_ranks