Prints configuration for DBCSR
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
integer, | intent(in) | :: | unit_nr |
SUBROUTINE dbcsr_print_config(unit_nr) !! Prints configuration for DBCSR INTEGER, INTENT(IN) :: unit_nr CHARACTER(len=default_string_length) :: mm_name IF (unit_nr <= 0) & RETURN SELECT CASE (dbcsr_cfg%mm_driver%val) CASE (mm_driver_blas); mm_name = mm_name_blas CASE (mm_driver_matmul); mm_name = mm_name_matmul CASE (mm_driver_smm); mm_name = mm_name_smm CASE (mm_driver_xsmm); mm_name = mm_name_xsmm CASE DEFAULT DBCSR_ABORT("Unknown MM driver") END SELECT WRITE (UNIT=unit_nr, FMT='(1X,A,T41,A40,A4)') & "DBCSR| CPU Multiplication driver", ADJUSTR(mm_name(1:40)), & dbcsr_cfg%mm_driver%print_source() WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') & "DBCSR| Multrec recursion limit", dbcsr_cfg%multrec_limit%val, & dbcsr_cfg%multrec_limit%print_source() WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') & "DBCSR| Multiplication stack size", dbcsr_cfg%mm_stack_size%val, & dbcsr_cfg%mm_stack_size%print_source() IF (dbcsr_cfg%avg_elements_images%val > 0) THEN WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') & "DBCSR| Average elements for images", dbcsr_cfg%avg_elements_images%val, & dbcsr_cfg%avg_elements_images%print_source() ELSE WRITE (UNIT=unit_nr, FMT='(1X,A,T72,A,A4)') & "DBCSR| Maximum elements for images", "UNLIMITED", & dbcsr_cfg%avg_elements_images%print_source() END IF WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') & "DBCSR| Multiplicative factor virtual images", dbcsr_cfg%num_mult_images%val, & dbcsr_cfg%num_mult_images%print_source() WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1,A4)') & "DBCSR| Use multiplication densification", dbcsr_cfg%mm_dense%val, & dbcsr_cfg%mm_dense%print_source() WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') & "DBCSR| Multiplication size stacks", dbcsr_cfg%n_stacks%val, & dbcsr_cfg%n_stacks%print_source() WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1,A4)') & "DBCSR| Use memory pool for CPU allocation", dbcsr_cfg%use_mempools_cpu%val, & dbcsr_cfg%use_mempools_cpu%print_source() IF (has_mpi) THEN IF (dbcsr_cfg%num_layers_3D%val < 2) THEN WRITE (UNIT=unit_nr, FMT='(1X,A,T75,A,A4)') & "DBCSR| Number of 3D layers", "SINGLE", & dbcsr_cfg%use_mempools_cpu%print_source() ELSE WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') & "DBCSR| Number of 3D layers", dbcsr_cfg%num_layers_3D%val, & dbcsr_cfg%use_mempools_cpu%print_source() END IF WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1,A4)') & "DBCSR| Use MPI memory allocation", dbcsr_cfg%use_mpi_allocator%val, & dbcsr_cfg%use_mpi_allocator%print_source() WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1,A4)') & "DBCSR| Use RMA algorithm", dbcsr_cfg%use_mpi_rma%val, & dbcsr_cfg%use_mpi_rma%print_source() WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1,A4)') & "DBCSR| Use Communication thread", dbcsr_cfg%use_comm_thread%val, & dbcsr_cfg%use_comm_thread%print_source() WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') & "DBCSR| Communication thread load", dbcsr_cfg%comm_thread_load%val, & dbcsr_cfg%comm_thread_load%print_source() BLOCK INTEGER :: numnodes, mynode CALL mp_environ(numnodes, mynode, mp_comm_world) WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11)') & "DBCSR| MPI: My process id", mynode WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11)') & "DBCSR| MPI: Number of processes", numnodes END BLOCK END IF BLOCK INTEGER :: numthreads, numthreads_max numthreads = -1 numthreads_max = -1 !$OMP PARALLEL DEFAULT(NONE) SHARED(numthreads, numthreads_max) !$OMP MASTER !$ numthreads = omp_get_num_threads() !$ numthreads_max = omp_get_max_threads() !$OMP END MASTER !$OMP END PARALLEL IF (numthreads_max > 0) THEN WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11)') & "DBCSR| OMP: Current number of threads", numthreads WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11)') & "DBCSR| OMP: Max number of threads", numthreads_max ELSE WRITE (UNIT=unit_nr, FMT='(1X,A,T70,A11)') & "DBCSR| OMP: Current number of threads", "<N/A>" WRITE (UNIT=unit_nr, FMT='(1X,A,T70,A11)') & "DBCSR| OMP: Max number of threads", "<N/A>" END IF END BLOCK IF (has_acc) THEN WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11)') & "DBCSR| ACC: Number of devices/node", dbcsr_acc_get_ndevices() WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') & "DBCSR| ACC: Number of stack-buffers per thread", dbcsr_cfg%accdrv_thread_buffers%val, & dbcsr_cfg%accdrv_thread_buffers%print_source() WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1,A4)') & "DBCSR| ACC: Avoid driver after busy ", dbcsr_cfg%accdrv_avoid_after_busy%val, & dbcsr_cfg%accdrv_avoid_after_busy%print_source() WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1,A4)') & "DBCSR| ACC: Process inhomogeneous stacks", dbcsr_cfg%accdrv_do_inhomogenous%val, & dbcsr_cfg%accdrv_do_inhomogenous%print_source() WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') & "DBCSR| ACC: Min. flop for processing", dbcsr_cfg%accdrv_min_flop_process%val, & dbcsr_cfg%accdrv_min_flop_process%print_source() #if defined(__DBCSR_ACC_G2G) WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1,A4)') & "DBCSR| ACC: Use G2G algorithm", dbcsr_cfg%use_acc_g2g%val, & dbcsr_cfg%use_acc_g2g%print_source() #endif IF (dbcsr_cfg%accdrv_stack_sort%val) THEN WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') & "DBCSR| ACC: Min. flop for sorting", dbcsr_cfg%accdrv_min_flop_sort%val, & dbcsr_cfg%accdrv_min_flop_sort%print_source() WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') & "DBCSR| ACC: Number of binning bins", dbcsr_cfg%accdrv_binning_nbins%val, & dbcsr_cfg%accdrv_binning_nbins%print_source() WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') & "DBCSR| ACC: Size of binning bins", dbcsr_cfg%accdrv_binning_binsize%val, & dbcsr_cfg%accdrv_binning_binsize%print_source() END IF WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1,A4)') & "DBCSR| ACC: GPU backend is enabled", dbcsr_cfg%run_on_gpu%val, dbcsr_cfg%run_on_gpu%print_source() END IF WRITE (UNIT=unit_nr, FMT='(1X,A,T74,ES7.1,A4)') & "DBCSR| Split modifier for TAS multiplication algorithm", dbcsr_cfg%tas_split_factor%val, & dbcsr_cfg%tas_split_factor%print_source() END SUBROUTINE dbcsr_print_config