Prints configuration for DBCSR
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
integer, | intent(in) | :: | unit_nr |
SUBROUTINE dbcsr_print_config(unit_nr)
!! Prints configuration for DBCSR
INTEGER, INTENT(IN) :: unit_nr
CHARACTER(len=default_string_length) :: mm_name
IF (unit_nr <= 0) &
RETURN
SELECT CASE (dbcsr_cfg%mm_driver%val)
CASE (mm_driver_blas); mm_name = mm_name_blas
CASE (mm_driver_matmul); mm_name = mm_name_matmul
CASE (mm_driver_smm); mm_name = mm_name_smm
CASE (mm_driver_xsmm); mm_name = mm_name_xsmm
CASE DEFAULT
DBCSR_ABORT("Unknown MM driver")
END SELECT
WRITE (UNIT=unit_nr, FMT='(1X,A,T41,A40,A4)') &
"DBCSR| CPU Multiplication driver", ADJUSTR(mm_name(1:40)), &
dbcsr_cfg%mm_driver%print_source()
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') &
"DBCSR| Multrec recursion limit", dbcsr_cfg%multrec_limit%val, &
dbcsr_cfg%multrec_limit%print_source()
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') &
"DBCSR| Multiplication stack size", dbcsr_cfg%mm_stack_size%val, &
dbcsr_cfg%mm_stack_size%print_source()
IF (dbcsr_cfg%avg_elements_images%val > 0) THEN
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') &
"DBCSR| Average elements for images", dbcsr_cfg%avg_elements_images%val, &
dbcsr_cfg%avg_elements_images%print_source()
ELSE
WRITE (UNIT=unit_nr, FMT='(1X,A,T72,A,A4)') &
"DBCSR| Maximum elements for images", "UNLIMITED", &
dbcsr_cfg%avg_elements_images%print_source()
END IF
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') &
"DBCSR| Multiplicative factor virtual images", dbcsr_cfg%num_mult_images%val, &
dbcsr_cfg%num_mult_images%print_source()
WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1,A4)') &
"DBCSR| Use multiplication densification", dbcsr_cfg%mm_dense%val, &
dbcsr_cfg%mm_dense%print_source()
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') &
"DBCSR| Multiplication size stacks", dbcsr_cfg%n_stacks%val, &
dbcsr_cfg%n_stacks%print_source()
WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1,A4)') &
"DBCSR| Use memory pool for CPU allocation", dbcsr_cfg%use_mempools_cpu%val, &
dbcsr_cfg%use_mempools_cpu%print_source()
IF (has_mpi) THEN
IF (dbcsr_cfg%num_layers_3D%val < 2) THEN
WRITE (UNIT=unit_nr, FMT='(1X,A,T75,A,A4)') &
"DBCSR| Number of 3D layers", "SINGLE", &
dbcsr_cfg%use_mempools_cpu%print_source()
ELSE
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') &
"DBCSR| Number of 3D layers", dbcsr_cfg%num_layers_3D%val, &
dbcsr_cfg%use_mempools_cpu%print_source()
END IF
WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1,A4)') &
"DBCSR| Use MPI memory allocation", dbcsr_cfg%use_mpi_allocator%val, &
dbcsr_cfg%use_mpi_allocator%print_source()
WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1,A4)') &
"DBCSR| Use RMA algorithm", dbcsr_cfg%use_mpi_rma%val, &
dbcsr_cfg%use_mpi_rma%print_source()
WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1,A4)') &
"DBCSR| Use Communication thread", dbcsr_cfg%use_comm_thread%val, &
dbcsr_cfg%use_comm_thread%print_source()
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') &
"DBCSR| Communication thread load", dbcsr_cfg%comm_thread_load%val, &
dbcsr_cfg%comm_thread_load%print_source()
BLOCK
INTEGER :: numnodes, mynode
CALL mp_environ(numnodes, mynode, mp_comm_world)
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11)') &
"DBCSR| MPI: My process id", mynode
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11)') &
"DBCSR| MPI: Number of processes", numnodes
END BLOCK
END IF
BLOCK
INTEGER :: numthreads, numthreads_max
numthreads = -1
numthreads_max = -1
!$OMP PARALLEL DEFAULT(NONE) SHARED(numthreads, numthreads_max)
!$OMP MASTER
!$ numthreads = omp_get_num_threads()
!$ numthreads_max = omp_get_max_threads()
!$OMP END MASTER
!$OMP END PARALLEL
IF (numthreads_max > 0) THEN
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11)') &
"DBCSR| OMP: Current number of threads", numthreads
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11)') &
"DBCSR| OMP: Max number of threads", numthreads_max
ELSE
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,A11)') &
"DBCSR| OMP: Current number of threads", "<N/A>"
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,A11)') &
"DBCSR| OMP: Max number of threads", "<N/A>"
END IF
END BLOCK
IF (has_acc) THEN
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11)') &
"DBCSR| ACC: Number of devices/node", dbcsr_acc_get_ndevices()
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') &
"DBCSR| ACC: Number of stack-buffers per thread", dbcsr_cfg%accdrv_thread_buffers%val, &
dbcsr_cfg%accdrv_thread_buffers%print_source()
WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1,A4)') &
"DBCSR| ACC: Avoid driver after busy ", dbcsr_cfg%accdrv_avoid_after_busy%val, &
dbcsr_cfg%accdrv_avoid_after_busy%print_source()
WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1,A4)') &
"DBCSR| ACC: Process inhomogeneous stacks", dbcsr_cfg%accdrv_do_inhomogenous%val, &
dbcsr_cfg%accdrv_do_inhomogenous%print_source()
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') &
"DBCSR| ACC: Min. flop for processing", dbcsr_cfg%accdrv_min_flop_process%val, &
dbcsr_cfg%accdrv_min_flop_process%print_source()
IF (dbcsr_cfg%accdrv_stack_sort%val) THEN
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') &
"DBCSR| ACC: Min. flop for sorting", dbcsr_cfg%accdrv_min_flop_sort%val, &
dbcsr_cfg%accdrv_min_flop_sort%print_source()
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') &
"DBCSR| ACC: Number of binning bins", dbcsr_cfg%accdrv_binning_nbins%val, &
dbcsr_cfg%accdrv_binning_nbins%print_source()
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') &
"DBCSR| ACC: Size of binning bins", dbcsr_cfg%accdrv_binning_binsize%val, &
dbcsr_cfg%accdrv_binning_binsize%print_source()
END IF
END IF
WRITE (UNIT=unit_nr, FMT='(1X,A,T74,ES7.1,A4)') &
"DBCSR| Split modifier for TAS multiplication algorithm", dbcsr_cfg%tas_split_factor%val, &
dbcsr_cfg%tas_split_factor%print_source()
END SUBROUTINE dbcsr_print_config