# 1 "/__w/dbcsr/dbcsr/src/work/dbcsr_work_operations.F" 1 !--------------------------------------------------------------------------------------------------! ! Copyright (C) by the DBCSR developers group - All rights reserved ! ! This file is part of the DBCSR library. ! ! ! ! For information on the license, see the LICENSE file. ! ! For further information please visit https://dbcsr.cp2k.org ! ! SPDX-License-Identifier: GPL-2.0+ ! !--------------------------------------------------------------------------------------------------! MODULE dbcsr_work_operations !! DBCSR work matrix utilities USE dbcsr_array_types, ONLY: array_data, & array_hold, & array_i1d_obj, & array_new, & array_nullify, & array_release, & array_size USE dbcsr_btree, ONLY: btree_data_cp2d, & btree_data_dp2d, & btree_data_sp2d, & btree_data_zp2d USE dbcsr_block_operations, ONLY: block_copy_c, & block_copy_d, & block_copy_s, & block_copy_z, & dbcsr_data_copy, & dbcsr_data_set USE dbcsr_config, ONLY: default_resize_factor USE dbcsr_data_methods, ONLY: & dbcsr_data_clear_pointer, dbcsr_data_ensure_size, dbcsr_data_get_memory_type, & dbcsr_data_get_size, dbcsr_data_get_size_referenced, dbcsr_data_get_type, dbcsr_data_hold, & dbcsr_data_init, dbcsr_data_new, dbcsr_data_release, dbcsr_data_set_size_referenced, & dbcsr_data_valid, dbcsr_get_data_p_c, dbcsr_get_data_p_d, dbcsr_get_data_p_s, & dbcsr_get_data_p_z USE dbcsr_data_operations, ONLY: dbcsr_data_copyall, & dbcsr_sort_data, & dbcsr_switch_data_area USE dbcsr_dist_methods, ONLY: dbcsr_distribution_has_threads, & dbcsr_distribution_hold, & dbcsr_distribution_make_threads, & dbcsr_distribution_ncols, & dbcsr_distribution_nrows, & dbcsr_distribution_release USE dbcsr_index_operations, ONLY: dbcsr_addto_index_array, & dbcsr_build_row_index, & dbcsr_clearfrom_index_array, & dbcsr_index_prune_deleted, & dbcsr_make_dbcsr_index, & dbcsr_make_index_exist, & dbcsr_repoint_index, & dbcsr_sort_indices USE dbcsr_iterator_operations, ONLY: dbcsr_iterator_blocks_left, & dbcsr_iterator_next_block, & dbcsr_iterator_start, & dbcsr_iterator_stop USE dbcsr_mem_methods, ONLY: dbcsr_memtype_equal USE dbcsr_methods, ONLY: & dbcsr_col_block_sizes, dbcsr_distribution, dbcsr_get_data_memory_type, & dbcsr_get_data_size_used, dbcsr_get_data_type, dbcsr_get_index_memory_type, & dbcsr_get_matrix_type, dbcsr_get_replication_type, dbcsr_matrix_counter, & dbcsr_max_col_size, dbcsr_max_row_size, dbcsr_mutable_destroy, dbcsr_mutable_init, & dbcsr_mutable_instantiated, dbcsr_mutable_new, dbcsr_mutable_release, dbcsr_name, & dbcsr_row_block_sizes, dbcsr_use_mutable, dbcsr_valid_index, dbcsr_wm_use_mutable USE dbcsr_ptr_util, ONLY: ensure_array_size USE dbcsr_toollib, ONLY: dbcsr_unpack_i8_2i4, & sort USE dbcsr_string_utilities, ONLY: uppercase USE dbcsr_types, ONLY: & dbcsr_data_obj, dbcsr_distribution_obj, dbcsr_iterator, dbcsr_memtype_default, & dbcsr_memtype_type, dbcsr_meta_size, dbcsr_num_slots, dbcsr_repl_col, dbcsr_repl_full, & dbcsr_repl_none, dbcsr_repl_row, dbcsr_slot_blk_p, dbcsr_slot_col_i, dbcsr_slot_home_coli, & dbcsr_slot_home_pcol, dbcsr_slot_home_prow, dbcsr_slot_home_rowi, dbcsr_slot_home_vpcol, & dbcsr_slot_home_vprow, dbcsr_slot_nblkcols_local, dbcsr_slot_nblkcols_total, & dbcsr_slot_nblkrows_local, dbcsr_slot_nblkrows_total, dbcsr_slot_nblks, & dbcsr_slot_nfullcols_local, dbcsr_slot_nfullcols_total, dbcsr_slot_nfullrows_local, & dbcsr_slot_nfullrows_total, dbcsr_slot_nze, dbcsr_slot_row_p, dbcsr_slot_size, dbcsr_type, & dbcsr_type_antihermitian, dbcsr_type_antisymmetric, dbcsr_type_complex_4, & dbcsr_type_complex_8, dbcsr_type_hermitian, dbcsr_type_no_symmetry, dbcsr_type_real_4, & dbcsr_type_real_8, dbcsr_type_real_default, dbcsr_type_symmetric, dbcsr_work_type USE dbcsr_dist_util, ONLY: convert_sizes_to_offsets, & dbcsr_calc_block_sizes, & dbcsr_verify_matrix, & meta_from_dist USE dbcsr_kinds, ONLY: default_string_length, & int_8, & real_4, & real_8 #include "base/dbcsr_base_uses.f90" !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads, omp_in_parallel IMPLICIT NONE PRIVATE CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'dbcsr_work_operations' LOGICAL, PARAMETER :: careful_mod = .FALSE. PUBLIC :: dbcsr_create, dbcsr_work_create, dbcsr_finalize, dbcsr_special_finalize PUBLIC :: dbcsr_add_wm_from_matrix, & add_work_coordinate PUBLIC :: dbcsr_work_destroy INTERFACE dbcsr_create MODULE PROCEDURE dbcsr_create_new, dbcsr_create_template END INTERFACE TYPE i_array_p INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: p => NULL() END TYPE i_array_p CONTAINS SUBROUTINE dbcsr_create_new(matrix, name, dist, matrix_type, & row_blk_size, col_blk_size, row_blk_size_obj, col_blk_size_obj, & nze, data_type, data_buffer, & data_memory_type, index_memory_type, & max_rbs, max_cbs, & row_blk_offset, col_blk_offset, & thread_dist, & reuse, reuse_arrays, mutable_work, make_index, replication_type) !! Creates a matrix, allocating the essentials. !! !! The matrix itself is allocated, as well as the essential parts of !! the index. When passed the nze argument, the data is also allocated !! to that size. !! see dbcsr_types.F TYPE(dbcsr_type), INTENT(INOUT) :: matrix !! new matrix CHARACTER(len=*), INTENT(IN) :: name TYPE(dbcsr_distribution_obj), INTENT(IN) :: dist !! distribution_2d distribution CHARACTER, INTENT(IN) :: matrix_type !! 'N' for normal, 'T' for transposed, 'S' for symmetric, and 'A' for antisymmetric INTEGER, DIMENSION(:), INTENT(INOUT), POINTER, & CONTIGUOUS, OPTIONAL :: row_blk_size, col_blk_size TYPE(array_i1d_obj), INTENT(IN), OPTIONAL :: row_blk_size_obj, col_blk_size_obj INTEGER, INTENT(IN), OPTIONAL :: nze, data_type !! number of elements !! type of data from 'rRcC' for single/double precision real/complex, default is 'R' TYPE(dbcsr_data_obj), INTENT(IN), OPTIONAL :: data_buffer TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL :: data_memory_type, index_memory_type !! allocate indices and data using special memory !! allocate indices using special memory INTEGER, INTENT(IN), OPTIONAL :: max_rbs, max_cbs TYPE(array_i1d_obj), INTENT(IN), OPTIONAL :: row_blk_offset, col_blk_offset TYPE(dbcsr_distribution_obj), INTENT(IN), OPTIONAL :: thread_dist LOGICAL, INTENT(IN), OPTIONAL :: reuse, reuse_arrays, mutable_work, & make_index !! reuses an existing matrix, default is to create a fresh one !! uses the mutable data for working and not the append-only data; default is append-only CHARACTER, INTENT(IN), OPTIONAL :: replication_type !! replication to be used for this matrix; default is dbcsr_repl_none CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_create_new' CHARACTER :: matrix_type_l INTEGER :: handle, my_nze INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: vec_col_blk_offset, vec_row_blk_offset INTEGER, DIMENSION(dbcsr_meta_size) :: new_meta LOGICAL :: hijack, my_make_index ! --------------------------------------------------------------------------- MARK_USED(thread_dist) ! only used with OMP CALL timeset(routineN, handle) ! Reuse matrix only if has actually been allocated. hijack = ASSOCIATED(matrix%index) IF (PRESENT(reuse)) hijack = reuse my_make_index = .TRUE. IF (PRESENT(make_index)) my_make_index = make_index IF (.NOT. hijack) THEN matrix = dbcsr_type() matrix%refcount = 1 END IF !$OMP CRITICAL (crit_counter) matrix%serial_number = dbcsr_matrix_counter dbcsr_matrix_counter = dbcsr_matrix_counter + 1 !$OMP END CRITICAL (crit_counter) ! Mark matrix index as having an invalid index. matrix%valid = .FALSE. matrix%name = name ! Sets the type of matrix building/modifying work structures. IF (PRESENT(mutable_work)) THEN matrix%work_mutable = mutable_work ELSE matrix%work_mutable = .FALSE. END IF ! Sets the correct data type. IF (PRESENT(data_type)) THEN SELECT CASE (data_type) CASE (dbcsr_type_real_4) matrix%data_type = dbcsr_type_real_4 CASE (dbcsr_type_real_8) matrix%data_type = dbcsr_type_real_8 CASE (dbcsr_type_complex_4) matrix%data_type = dbcsr_type_complex_4 CASE (dbcsr_type_complex_8) matrix%data_type = dbcsr_type_complex_8 CASE DEFAULT DBCSR_ABORT("Invalid matrix type") END SELECT ELSE matrix%data_type = dbcsr_type_real_default END IF matrix%data_memory_type = dbcsr_memtype_default IF (PRESENT(data_memory_type)) & matrix%data_memory_type = data_memory_type matrix%index_memory_type = dbcsr_memtype_default IF (PRESENT(index_memory_type)) & matrix%index_memory_type = index_memory_type IF (hijack) THEN ! Release/deallocate elements that are replaced or not needed ! by the new matrix. This is similar to what dbcsr_destroy ! does, except that it keeps the index and data. CALL array_release(matrix%row_blk_size) CALL array_release(matrix%col_blk_size) CALL array_release(matrix%row_blk_offset) CALL array_release(matrix%col_blk_offset) IF (matrix%has_local_rows) & CALL array_release(matrix%local_rows) IF (matrix%has_global_rows) & CALL array_release(matrix%global_rows) IF (matrix%has_local_cols) & CALL array_release(matrix%local_cols) IF (matrix%has_global_cols) & CALL array_release(matrix%global_cols) CALL dbcsr_distribution_release(matrix%dist) IF (ASSOCIATED(matrix%wms)) THEN CALL dbcsr_work_destroy_all(matrix) END IF CALL array_nullify(matrix%local_rows) CALL array_nullify(matrix%global_rows) CALL array_nullify(matrix%local_cols) CALL array_nullify(matrix%global_cols) ! IF (matrix%data_type /= matrix%data_area%d%data_type) & DBCSR_ABORT("Inconsistent data type for the existing buffer.") CALL dbcsr_data_set_size_referenced(matrix%data_area, 0) ELSE ! Invalidate index NULLIFY (matrix%index) ! Invalidate data IF (PRESENT(data_buffer)) THEN IF (.NOT. dbcsr_data_valid(data_buffer)) & DBCSR_ABORT("Input data buffer not valid.") IF (matrix%data_type /= data_buffer%d%data_type) & DBCSR_ABORT("Input buffer data type different by matrix data type.") matrix%data_memory_type = data_buffer%d%memory_type matrix%data_area = data_buffer CALL dbcsr_data_hold(matrix%data_area) ELSE CALL dbcsr_data_init(matrix%data_area) END IF END IF ! These are always invalidated. NULLIFY (matrix%row_p, matrix%col_i, matrix%blk_p, matrix%thr_c, & matrix%coo_l) IF (PRESENT(row_blk_size_obj)) THEN matrix%row_blk_size = row_blk_size_obj CALL array_hold(matrix%row_blk_size) ELSEIF (PRESENT(row_blk_size)) THEN CALL array_new(matrix%row_blk_size, row_blk_size, gift=reuse_arrays) ELSE DBCSR_ABORT("Missing row_blk_size") END IF IF (PRESENT(max_rbs)) THEN matrix%max_rbs = max_rbs ELSE IF (array_size(matrix%row_blk_size) .GT. 0) THEN matrix%max_rbs = MAXVAL(array_data(matrix%row_blk_size)) ELSE matrix%max_rbs = 0 END IF IF (PRESENT(col_blk_size_obj)) THEN matrix%col_blk_size = col_blk_size_obj CALL array_hold(matrix%col_blk_size) ELSEIF (PRESENT(col_blk_size)) THEN CALL array_new(matrix%col_blk_size, col_blk_size, gift=reuse_arrays) ELSE DBCSR_ABORT("Missing col_blk_size") END IF IF (PRESENT(max_cbs)) THEN matrix%max_cbs = max_cbs ELSE IF (array_size(matrix%col_blk_size) .GT. 0) THEN matrix%max_cbs = MAXVAL(array_data(matrix%col_blk_size)) ELSE matrix%max_cbs = 0 END IF ! IF (array_size(matrix%row_blk_size) /= dbcsr_distribution_nrows(dist)) & DBCSR_ABORT("Number of blocked rows does match blocked row distribution.") IF (array_size(matrix%col_blk_size) /= dbcsr_distribution_ncols(dist)) & DBCSR_ABORT("Number of blocked columns does match blocked column distribution.") ! initialize row/col offsets IF (PRESENT(row_blk_offset)) THEN IF (dbcsr_distribution_nrows(dist) + 1 /= array_size(row_blk_offset)) & CALL dbcsr_abort(__LOCATION__, & "Number of blocked offset rows does match blocked row distribution.") matrix%row_blk_offset = row_blk_offset CALL array_hold(matrix%row_blk_offset) ELSE ALLOCATE (vec_row_blk_offset(array_size(matrix%row_blk_size) + 1)) CALL convert_sizes_to_offsets(array_data(matrix%row_blk_size), vec_row_blk_offset) CALL array_new(matrix%row_blk_offset, vec_row_blk_offset, gift=.TRUE.) END IF IF (PRESENT(col_blk_offset)) THEN IF (dbcsr_distribution_ncols(dist) + 1 /= array_size(col_blk_offset)) & CALL dbcsr_abort(__LOCATION__, & "Number of blocked offset columns does match blocked column distribution.") matrix%col_blk_offset = col_blk_offset CALL array_hold(matrix%col_blk_offset) ELSE ALLOCATE (vec_col_blk_offset(array_size(matrix%col_blk_size) + 1)) CALL convert_sizes_to_offsets(array_data(matrix%col_blk_size), vec_col_blk_offset) CALL array_new(matrix%col_blk_offset, vec_col_blk_offset, gift=.TRUE.) END IF matrix%dist = dist CALL dbcsr_distribution_hold(matrix%dist) !$ IF (.NOT. dbcsr_distribution_has_threads(matrix%dist) .AND. PRESENT(thread_dist)) THEN !$ IF (dbcsr_distribution_has_threads(thread_dist)) THEN !$ matrix%dist%d%num_threads = thread_dist%d%num_threads !$ matrix%dist%d%has_thread_dist = .TRUE. !$ matrix%dist%d%thread_dist = thread_dist%d%thread_dist !$ CALL array_hold(matrix%dist%d%thread_dist) !$ END IF !$ END IF !$ IF (.NOT. dbcsr_distribution_has_threads(matrix%dist)) THEN !$ CALL dbcsr_distribution_make_threads(matrix%dist, & !$ array_data(matrix%row_blk_size)) !$ END IF ! Set up some data. IF (my_make_index) THEN CALL meta_from_dist(new_meta, dist, array_data(matrix%row_blk_size), & array_data(matrix%col_blk_size)) matrix%nblkrows_total = new_meta(dbcsr_slot_nblkrows_total) matrix%nblkcols_total = new_meta(dbcsr_slot_nblkcols_total) matrix%nfullrows_total = new_meta(dbcsr_slot_nfullrows_total) matrix%nfullcols_total = new_meta(dbcsr_slot_nfullcols_total) matrix%nblkrows_local = new_meta(dbcsr_slot_nblkrows_local) matrix%nblkcols_local = new_meta(dbcsr_slot_nblkcols_local) matrix%nfullrows_local = new_meta(dbcsr_slot_nfullrows_local) matrix%nfullcols_local = new_meta(dbcsr_slot_nfullcols_local) END IF my_nze = 0; IF (PRESENT(nze)) my_nze = nze matrix%nblks = 0 matrix%nze = 0 IF (PRESENT(replication_type)) THEN IF (replication_type .NE. dbcsr_repl_none & .AND. replication_type .NE. dbcsr_repl_full & .AND. replication_type .NE. dbcsr_repl_row & .AND. replication_type .NE. dbcsr_repl_col) & DBCSR_ABORT("Invalid replication type '"//replication_type//"'") IF (replication_type .EQ. dbcsr_repl_row .OR. replication_type .EQ. dbcsr_repl_col) & DBCSR_WARN("Row and column replication not fully supported") matrix%replication_type = replication_type ELSE matrix%replication_type = dbcsr_repl_none END IF ! ! Setup a matrix from scratch IF (.NOT. hijack) THEN IF (.NOT. PRESENT(data_buffer)) THEN CALL dbcsr_data_new(matrix%data_area, matrix%data_type, my_nze, & memory_type=matrix%data_memory_type) CALL dbcsr_data_set_size_referenced(matrix%data_area, 0) END IF ! IF (my_make_index) THEN NULLIFY (matrix%index) CALL ensure_array_size(matrix%index, lb=1, ub=dbcsr_num_slots, & zero_pad=.TRUE., memory_type=matrix%index_memory_type) END IF END IF IF (my_make_index) THEN IF (LBOUND(matrix%index, 1) .GT. 1 & .OR. UBOUND(matrix%index, 1) .LT. dbcsr_num_slots) & DBCSR_ABORT("Index is not large enough") matrix%index(1:dbcsr_num_slots) = 0 matrix%index(1:dbcsr_meta_size) = new_meta(1:dbcsr_meta_size) matrix%index(dbcsr_slot_size) = dbcsr_num_slots END IF ! matrix%symmetry = .FALSE. matrix%negate_real = .FALSE. matrix%negate_imaginary = .FALSE. !matrix%transpose = .FALSE. matrix_type_l = matrix_type CALL uppercase(matrix_type_l) SELECT CASE (matrix_type_l) CASE (dbcsr_type_no_symmetry) CASE (dbcsr_type_symmetric) matrix%symmetry = .TRUE. CASE (dbcsr_type_antisymmetric) matrix%symmetry = .TRUE. matrix%negate_real = .TRUE. matrix%negate_imaginary = .TRUE. CASE (dbcsr_type_hermitian) matrix%symmetry = .TRUE. matrix%negate_imaginary = .TRUE. CASE (dbcsr_type_antihermitian) matrix%symmetry = .TRUE. matrix%negate_real = .TRUE. CASE DEFAULT DBCSR_ABORT("Invalid matrix type.") END SELECT matrix%bcsc = .FALSE. matrix%local_indexing = .FALSE. matrix%list_indexing = .FALSE. CALL array_nullify(matrix%local_rows) CALL array_nullify(matrix%global_rows) CALL array_nullify(matrix%local_cols) CALL array_nullify(matrix%global_cols) matrix%has_local_rows = .FALSE. matrix%has_global_rows = .FALSE. matrix%has_local_cols = .FALSE. matrix%has_global_cols = .FALSE. IF (my_make_index) THEN CALL dbcsr_make_index_exist(matrix) END IF matrix%valid = .TRUE. CALL timestop(handle) END SUBROUTINE dbcsr_create_new SUBROUTINE dbcsr_create_template(matrix, template, name, dist, matrix_type, & row_blk_size, col_blk_size, row_blk_size_obj, col_blk_size_obj, & nze, data_type, & data_buffer, data_memory_type, index_memory_type, & max_rbs, max_cbs, & row_blk_offset, col_blk_offset, & reuse_arrays, mutable_work, make_index, replication_type) TYPE(dbcsr_type), INTENT(INOUT) :: matrix TYPE(dbcsr_type), INTENT(IN) :: template CHARACTER(len=*), INTENT(IN), OPTIONAL :: name TYPE(dbcsr_distribution_obj), INTENT(IN), OPTIONAL :: dist CHARACTER, INTENT(IN), OPTIONAL :: matrix_type INTEGER, DIMENSION(:), INTENT(INOUT), OPTIONAL, & POINTER, CONTIGUOUS :: row_blk_size, col_blk_size TYPE(array_i1d_obj), INTENT(IN), OPTIONAL :: row_blk_size_obj, col_blk_size_obj INTEGER, INTENT(IN), OPTIONAL :: nze, data_type TYPE(dbcsr_data_obj), INTENT(IN), OPTIONAL :: data_buffer TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL :: data_memory_type, index_memory_type INTEGER, INTENT(IN), OPTIONAL :: max_rbs, max_cbs TYPE(array_i1d_obj), INTENT(IN), OPTIONAL :: row_blk_offset, col_blk_offset LOGICAL, INTENT(IN), OPTIONAL :: reuse_arrays, mutable_work, make_index CHARACTER, INTENT(IN), OPTIONAL :: replication_type CHARACTER :: new_matrix_type, new_replication_type CHARACTER(len=default_string_length) :: new_name INTEGER :: new_data_type, new_max_cbs, new_max_rbs LOGICAL :: my_make_index, new_mutable_work TYPE(array_i1d_obj) :: new_col_blk_offset, new_row_blk_offset TYPE(dbcsr_distribution_obj) :: new_dist TYPE(dbcsr_memtype_type) :: new_data_memory_type, & new_index_memory_type INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: blk_size ! --------------------------------------------------------------------------- IF (PRESENT(name)) THEN new_name = TRIM(name) ELSE new_name = TRIM(dbcsr_name(template)) END IF IF (PRESENT(dist)) THEN new_dist = dist ELSE new_dist = dbcsr_distribution(template) END IF IF (PRESENT(matrix_type)) THEN new_matrix_type = matrix_type ELSE new_matrix_type = dbcsr_get_matrix_type(template) END IF ! IF ((PRESENT(row_blk_size) .NEQV. PRESENT(col_blk_size)) .OR. & (PRESENT(row_blk_size_obj) .NEQV. PRESENT(col_blk_size_obj))) THEN DBCSR_ABORT("Both row_blk_size and col_blk_size must be provided") END IF ! IF (PRESENT(max_rbs)) new_max_rbs = max_rbs IF (PRESENT(row_blk_offset)) new_row_blk_offset = row_blk_offset NULLIFY (blk_size) IF (PRESENT(row_blk_size_obj)) THEN blk_size => array_data(row_blk_size_obj) ELSEIF (PRESENT(row_blk_size)) THEN blk_size => row_blk_size END IF IF (ASSOCIATED(blk_size)) THEN IF (.NOT. PRESENT(max_rbs)) & new_max_rbs = MAXVAL(blk_size) ELSE IF (.NOT. PRESENT(max_rbs)) & new_max_rbs = dbcsr_max_row_size(template) IF (.NOT. PRESENT(row_blk_offset)) & new_row_blk_offset = template%row_blk_offset END IF ! IF (PRESENT(max_cbs)) new_max_cbs = max_cbs IF (PRESENT(col_blk_offset)) new_col_blk_offset = col_blk_offset NULLIFY (blk_size) IF (PRESENT(col_blk_size_obj)) THEN blk_size => array_data(col_blk_size_obj) ELSEIF (PRESENT(col_blk_size)) THEN blk_size => col_blk_size END IF IF (ASSOCIATED(blk_size)) THEN IF (.NOT. PRESENT(max_cbs)) & new_max_cbs = MAXVAL(blk_size) ELSE IF (.NOT. PRESENT(max_cbs)) & new_max_cbs = dbcsr_max_col_size(template) IF (.NOT. PRESENT(col_blk_offset)) & new_col_blk_offset = template%col_blk_offset END IF IF (PRESENT(data_type)) THEN new_data_type = data_type ELSE new_data_type = dbcsr_get_data_type(template) END IF IF (PRESENT(data_memory_type)) THEN new_data_memory_type = data_memory_type ELSE new_data_memory_type = dbcsr_get_data_memory_type(template) END IF IF (PRESENT(index_memory_type)) THEN new_index_memory_type = index_memory_type ELSE new_index_memory_type = dbcsr_get_index_memory_type(template) END IF IF (PRESENT(replication_type)) THEN new_replication_type = replication_type ELSE new_replication_type = dbcsr_get_replication_type(template) END IF IF (PRESENT(mutable_work)) THEN new_mutable_work = mutable_work ELSE new_mutable_work = dbcsr_use_mutable(template) END IF IF (PRESENT(row_blk_size_obj)) THEN CALL dbcsr_create(matrix, name=new_name, dist=new_dist, & matrix_type=new_matrix_type, & row_blk_size_obj=row_blk_size_obj, & col_blk_size_obj=col_blk_size_obj, & nze=nze, & data_type=new_data_type, & data_buffer=data_buffer, & data_memory_type=new_data_memory_type, & index_memory_type=new_index_memory_type, & max_rbs=new_max_rbs, max_cbs=new_max_cbs, & row_blk_offset=row_blk_offset, col_blk_offset=col_blk_offset, & reuse_arrays=reuse_arrays, & mutable_work=new_mutable_work, & make_index=make_index, & replication_type=new_replication_type) ELSEIF (PRESENT(row_blk_size)) THEN CALL dbcsr_create(matrix, name=new_name, dist=new_dist, & matrix_type=new_matrix_type, & row_blk_size=row_blk_size, & col_blk_size=col_blk_size, & nze=nze, & data_type=new_data_type, & data_buffer=data_buffer, & data_memory_type=new_data_memory_type, & index_memory_type=new_index_memory_type, & max_rbs=new_max_rbs, max_cbs=new_max_cbs, & row_blk_offset=row_blk_offset, col_blk_offset=col_blk_offset, & reuse_arrays=reuse_arrays, & mutable_work=new_mutable_work, & make_index=make_index, & replication_type=new_replication_type) ELSE CALL dbcsr_create(matrix, name=new_name, dist=new_dist, & matrix_type=new_matrix_type, & row_blk_size_obj=template%row_blk_size, & col_blk_size_obj=template%col_blk_size, & nze=nze, & data_type=new_data_type, & data_buffer=data_buffer, & data_memory_type=new_data_memory_type, & index_memory_type=new_index_memory_type, & max_rbs=new_max_rbs, max_cbs=new_max_cbs, & row_blk_offset=new_row_blk_offset, col_blk_offset=new_col_blk_offset, & thread_dist=dbcsr_distribution(template), & reuse_arrays=reuse_arrays, & mutable_work=new_mutable_work, & make_index=make_index, & replication_type=new_replication_type) END IF ! Copy stuff from the meta-array. These are not normally needed, ! but have to be here for creating matrices from "image" matrices. my_make_index = .TRUE. IF (PRESENT(make_index)) my_make_index = make_index IF (my_make_index) THEN matrix%index(dbcsr_slot_home_prow) = template%index(dbcsr_slot_home_prow) matrix%index(dbcsr_slot_home_rowi) = template%index(dbcsr_slot_home_rowi) matrix%index(dbcsr_slot_home_pcol) = template%index(dbcsr_slot_home_pcol) matrix%index(dbcsr_slot_home_coli) = template%index(dbcsr_slot_home_coli) matrix%index(dbcsr_slot_home_vprow) = template%index(dbcsr_slot_home_vprow) matrix%index(dbcsr_slot_home_vpcol) = template%index(dbcsr_slot_home_vpcol) END IF IF (PRESENT(row_blk_size) .AND. .NOT. PRESENT(row_blk_offset)) THEN CALL array_release(new_row_blk_offset) END IF IF (PRESENT(col_blk_size) .AND. .NOT. PRESENT(col_blk_offset)) THEN CALL array_release(new_col_blk_offset) END IF END SUBROUTINE dbcsr_create_template SUBROUTINE dbcsr_init_wm(wm, data_type, nblks_guess, sizedata_guess, memory_type) !! Initializes one work matrix TYPE(dbcsr_work_type), INTENT(OUT) :: wm !! initialized work matrix INTEGER, INTENT(IN) :: data_type INTEGER, INTENT(IN), OPTIONAL :: nblks_guess, sizedata_guess !! estimated number of blocks !! estimated size of data TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL :: memory_type CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_init_wm' INTEGER :: handle, nblks, stat ! --------------------------------------------------------------------------- IF (careful_mod) CALL timeset(routineN, handle) wm%lastblk = 0 wm%datasize = 0 ! Index IF (PRESENT(nblks_guess)) THEN nblks = nblks_guess IF (nblks_guess < 0) & DBCSR_ABORT("Can not have negative block count.") ALLOCATE (wm%row_i(nblks), stat=stat) IF (stat /= 0) DBCSR_ABORT("wm%row_i") ALLOCATE (wm%col_i(nblks), stat=stat) IF (stat /= 0) DBCSR_ABORT("wm%col_i") ALLOCATE (wm%blk_p(nblks), stat=stat) IF (stat /= 0) DBCSR_ABORT("wm%blk_p") ELSE NULLIFY (wm%row_i, wm%col_i, wm%blk_p) !nblks = CEILING (REAL (matrix%nblkrows_local * matrix%nblkcols_local)& ! / REAL (dbcsr_mp_numnodes (dbcsr_distribution_mp (matrix%dist)))) END IF ! Data CALL dbcsr_data_init(wm%data_area) IF (PRESENT(sizedata_guess)) THEN IF (sizedata_guess < 0) & DBCSR_ABORT("Can not have negative data size.") CALL dbcsr_data_new(wm%data_area, data_type, & data_size=sizedata_guess, memory_type=memory_type) ELSE CALL dbcsr_data_new(wm%data_area, data_type, memory_type=memory_type) END IF CALL dbcsr_mutable_init(wm%mutable) IF (careful_mod) CALL timestop(handle) END SUBROUTINE dbcsr_init_wm SUBROUTINE dbcsr_work_create(matrix, nblks_guess, sizedata_guess, n, & work_mutable, memory_type) !! Creates a the working matrix(es) for a DBCSR matrix. TYPE(dbcsr_type), INTENT(INOUT) :: matrix !! new matrix INTEGER, INTENT(IN), OPTIONAL :: nblks_guess, sizedata_guess, n !! estimated number of blocks !! estimated size of data !! number work matrices to create, default is 1 LOGICAL, INTENT(in), OPTIONAL :: work_mutable !! use mutable work type, default is what was specified in create TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL :: memory_type CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_work_create' INTEGER :: handle, iw, nw, ow LOGICAL :: wms_new, wms_realloc TYPE(dbcsr_work_type), DIMENSION(:), POINTER :: wms ! --------------------------------------------------------------------------- CALL timeset(routineN, handle) IF (PRESENT(n)) THEN nw = n ELSE nw = 1 !$ IF (omp_in_parallel()) THEN !$ nw = omp_get_num_threads() !$ ELSE !$ nw = omp_get_max_threads() !$ END IF END IF !$OMP MASTER wms_new = .NOT. ASSOCIATED(matrix%wms) wms_realloc = .FALSE. IF (ASSOCIATED(matrix%wms)) THEN ow = SIZE(matrix%wms) IF (ow .LT. nw) & DBCSR_WARN("Number of work matrices less than threads.") IF (ow .LT. nw) wms_realloc = .TRUE. END IF IF (PRESENT(work_mutable)) THEN matrix%work_mutable = work_mutable END IF IF (wms_realloc) THEN ALLOCATE (wms(nw)) wms(1:ow) = matrix%wms(1:ow) DEALLOCATE (matrix%wms) matrix%wms => wms DO iw = ow + 1, nw CALL dbcsr_init_wm(matrix%wms(iw), matrix%data_type, & nblks_guess=nblks_guess, sizedata_guess=sizedata_guess, & memory_type=memory_type) IF (matrix%work_mutable) & CALL dbcsr_mutable_new(matrix%wms(iw)%mutable, & dbcsr_get_data_type(matrix)) END DO END IF IF (wms_new) THEN ALLOCATE (matrix%wms(nw)) DO iw = 1, nw CALL dbcsr_init_wm(matrix%wms(iw), matrix%data_type, & nblks_guess=nblks_guess, sizedata_guess=sizedata_guess, & memory_type=memory_type) IF (matrix%work_mutable) & CALL dbcsr_mutable_new(matrix%wms(iw)%mutable, & dbcsr_get_data_type(matrix)) END DO END IF matrix%valid = .FALSE. !$OMP END MASTER CALL timestop(handle) END SUBROUTINE dbcsr_work_create SUBROUTINE dbcsr_finalize(matrix, reshuffle) !! Creates the final dbcsr_type matrix from the working matrix. !! Work matrices (array or tree-based) are merged into the base DBCSR matrix. !! If a matrix is marked as having a valid index, then nothing is done. !! Deleted blocks are pruned from the index. TYPE(dbcsr_type), INTENT(INOUT) :: matrix !! final matrix LOGICAL, INTENT(IN), OPTIONAL :: reshuffle !! whether the data should be reshuffled, default is false CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_finalize' LOGICAL, PARAMETER :: dbg = .FALSE. INTEGER :: handle, i, nblks, nwms, start_offset INTEGER, ALLOCATABLE, DIMENSION(:) :: empty_row_p INTEGER, DIMENSION(:), POINTER, SAVE :: old_blk_p, old_col_i, old_row_p LOGICAL :: can_quick, fake_row_p, sort_data, spawn ! --------------------------------------------------------------------------- CALL timeset(routineN, handle) !$OMP MASTER NULLIFY (old_blk_p, old_col_i, old_row_p) !$OMP END MASTER !$OMP BARRIER ! If the matrix is not marked as dirty then skip the work. IF (dbcsr_valid_index(matrix)) THEN !"No need to finalize a valid matrix, skipping." ! ! A matrix with a valid index should not have associated work ! arrays. This may happen when this routine is called on a ! matrix that was not changed. !$OMP BARRIER !$OMP MASTER IF (ASSOCIATED(matrix%wms)) & CALL dbcsr_work_destroy_all(matrix) matrix%valid = .TRUE. !$OMP END MASTER !$OMP BARRIER CALL timestop(handle) RETURN END IF ! ! If possible, data copying is avoided. IF (PRESENT(reshuffle)) THEN sort_data = reshuffle ELSE sort_data = .FALSE. END IF ! ! Now make sure that a valid row_p exists. Also clear the row_p if ! the matrix is declared to have 0 blocks. !$OMP MASTER fake_row_p = .NOT. ASSOCIATED(matrix%row_p) IF (ASSOCIATED(matrix%row_p)) THEN fake_row_p = SIZE(matrix%row_p) .LE. 1 END IF fake_row_p = fake_row_p .OR. matrix%nblks .EQ. 0 !$OMP END MASTER ! ! See where data will be appended in the main data ! area. Alternatively, set to the start if the matrix is declared ! to have no data. (This value is ignored if reshuffle is true ! because the main data area is always new.) start_offset = matrix%nze i = dbcsr_get_data_size_used(matrix) !$OMP MASTER matrix%nze = 0 !$OMP END MASTER !$OMP BARRIER !$OMP ATOMIC matrix%nze = matrix%nze + i !$OMP BARRIER IF (dbg) THEN WRITE (*, *) routineN//" sizes", matrix%nze, i, & dbcsr_data_get_size_referenced(matrix%data_area), & dbcsr_data_get_size(matrix%data_area) END IF IF (.FALSE. .AND. dbcsr_data_get_size_referenced(matrix%data_area) .NE. & matrix%nze) THEN IF (matrix%nze .NE. dbcsr_data_get_size_referenced(matrix%data_area)) & DBCSR_WARN("Should reshuffle.") IF (ASSOCIATED(matrix%wms)) THEN sort_data = .NOT. dbcsr_wm_use_mutable(matrix%wms(1)) END IF END IF IF (sort_data .AND. matrix%nze .GT. 0) THEN CALL dbcsr_add_wm_from_matrix(matrix) matrix%nze = 0 !$OMP MASTER fake_row_p = .TRUE. !$OMP END MASTER END IF start_offset = dbcsr_data_get_size_referenced(matrix%data_area) + 1 IF (matrix%nze .EQ. 0) start_offset = 1 !$OMP MASTER matrix%index(dbcsr_slot_nze) = matrix%nze IF (fake_row_p) THEN ALLOCATE (empty_row_p(matrix%nblkrows_total + 1)) empty_row_p(:) = 0 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, & DATA=empty_row_p, extra=0) CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, & reservation=0) CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, & reservation=0) CALL dbcsr_repoint_index(matrix) END IF !$OMP END MASTER ! !$OMP BARRIER can_quick = can_quickly_finalize(matrix) !$OMP BARRIER ! If the matrix, work matrices, and environment fit several ! criteria, then a quick O(1) finalization is performed. IF (can_quick .AND. .NOT. sort_data) THEN CALL quick_finalize(matrix) ELSE ! !$OMP MASTER ! ! Create work matrices if not yet existing IF (.NOT. ASSOCIATED(matrix%wms)) THEN nwms = 1 !$ nwms = omp_get_num_threads() CALL dbcsr_work_create(matrix, n=nwms) END IF !$OMP END MASTER !$OMP BARRIER ! ! Ensure index arrays at least exist. !$OMP DO SCHEDULE (STATIC, 1) DO i = 1, SIZE(matrix%wms) IF (.NOT. ASSOCIATED(matrix%wms(i)%row_i)) THEN CALL ensure_array_size(matrix%wms(i)%row_i, ub=0) END IF IF (.NOT. ASSOCIATED(matrix%wms(i)%col_i)) THEN CALL ensure_array_size(matrix%wms(i)%col_i, ub=0) END IF IF (.NOT. ASSOCIATED(matrix%wms(i)%blk_p)) THEN CALL ensure_array_size(matrix%wms(i)%blk_p, ub=0) END IF END DO !$OMP ENDDO ! ! Check for deleted blocks !$OMP MASTER nblks = matrix%row_p(matrix%nblkrows_total + 1) IF (ANY(matrix%blk_p(1:nblks) .EQ. 0)) THEN CALL dbcsr_index_prune_deleted(matrix) END IF old_row_p => matrix%row_p old_col_i => matrix%col_i old_blk_p => matrix%blk_p !$OMP END MASTER ! !$OMP BARRIER ! Check to see if we will need to create a parallel environment ! (needed when there are multiple work matrices but we are not ! in an OpenMP parallel section.) ! ! A parallel section is created and used when the matrix has ! more work matrices. It's a shortcut when the finalize is ! called from a non-parallel environment whereas the matrix was ! built/modified in a parallel environment nwms = SIZE(matrix%wms) spawn = .FALSE. !$ IF (.NOT. omp_in_parallel()) THEN !$ IF (nwms .GT. 1) spawn = .TRUE. !$ END IF IF (spawn) THEN !$OMP PARALLEL IF (spawn) & !$OMP DEFAULT (NONE) & !$OMP SHARED (matrix, old_row_p, old_col_i, old_blk_p,& !$OMP start_offset, sort_data) CALL dbcsr_merge_all(matrix, & old_row_p, old_col_i, old_blk_p, & sort_data=sort_data) !$OMP END PARALLEL ELSE CALL dbcsr_merge_all(matrix, & old_row_p, old_col_i, old_blk_p, & sort_data=sort_data) END IF END IF !$OMP BARRIER !$OMP MASTER ! Clean up. IF (ASSOCIATED(matrix%wms)) THEN CALL dbcsr_work_destroy_all(matrix) END IF matrix%valid = .TRUE. !$OMP END MASTER !$OMP BARRIER IF (dbg) THEN !$OMP SINGLE CALL dbcsr_verify_matrix(matrix) !$OMP END SINGLE END IF !$OMP MASTER IF (fake_row_p) THEN DEALLOCATE (empty_row_p) END IF !$OMP END MASTER !$OMP BARRIER CALL timestop(handle) END SUBROUTINE dbcsr_finalize SUBROUTINE dbcsr_special_finalize(matrix, reshuffle) TYPE(dbcsr_type), INTENT(INOUT) :: matrix LOGICAL, INTENT(IN), OPTIONAL :: reshuffle CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_special_finalize' LOGICAL, PARAMETER :: dbg = .FALSE. INTEGER :: handle LOGICAL :: can_quick, sort_data ! --------------------------------------------------------------------------- CALL timeset(routineN, handle) IF (matrix%nblks .NE. 0) & DBCSR_ABORT("Optimized finalize requires empty matrix.") IF (dbcsr_valid_index(matrix)) & DBCSR_ABORT("Optimized finalize requires invalid matrix.") IF (.NOT. ASSOCIATED(matrix%wms)) & DBCSR_ABORT("Optimized finalize requires work matrices exist.") IF (SIZE(matrix%wms) .NE. 1) & DBCSR_ABORT("Optimized finalize requires single work matrix") ! ! If possible, data copying is avoided. IF (PRESENT(reshuffle)) THEN sort_data = reshuffle ELSE sort_data = .FALSE. END IF !$OMP BARRIER can_quick = can_quickly_finalize(matrix) !$OMP BARRIER ! If the matrix, work matrices, and environment fit several ! criteria, then a quick O(1) finalization is performed. IF (can_quick .AND. .NOT. sort_data) THEN CALL quick_finalize(matrix) ELSE IF (.NOT. sort_data) & DBCSR_ABORT("merge_single_wm only supports data sorting") ! ! Ensure needed index arrays at least exist. !$OMP MASTER ! IF (.NOT. ASSOCIATED(matrix%wms(1)%row_i)) THEN CALL ensure_array_size(matrix%wms(1)%row_i, ub=0) END IF IF (.NOT. ASSOCIATED(matrix%wms(1)%col_i)) THEN CALL ensure_array_size(matrix%wms(1)%col_i, ub=0) END IF IF (.NOT. ASSOCIATED(matrix%wms(1)%blk_p)) THEN CALL ensure_array_size(matrix%wms(1)%blk_p, ub=0) END IF ! !$OMP END MASTER !$OMP BARRIER ! !$OMP PARALLEL DEFAULT (NONE), SHARED(matrix) CALL dbcsr_merge_single_wm(matrix) !$OMP END PARALLEL END IF !$OMP MASTER ! Clean up. IF (ASSOCIATED(matrix%wms)) THEN CALL dbcsr_work_destroy_all(matrix) END IF matrix%valid = .TRUE. !$OMP END MASTER !$OMP BARRIER IF (dbg) THEN !$OMP SINGLE CALL dbcsr_verify_matrix(matrix) !$OMP END SINGLE END IF !$OMP BARRIER CALL timestop(handle) END SUBROUTINE dbcsr_special_finalize FUNCTION can_quickly_finalize(matrix) RESULT(quick) !! Checks whether the matrix can be finalized with minimal copying. TYPE(dbcsr_type), INTENT(IN) :: matrix !! matrix to check LOGICAL :: quick !! whether matrix can be quickly finalized ! --------------------------------------------------------------------------- IF (ASSOCIATED(matrix%wms)) THEN quick = matrix%nblks .EQ. 0 quick = quick .AND. SIZE(matrix%wms) .EQ. 1 .AND. & .NOT. dbcsr_wm_use_mutable(matrix%wms(1)) IF (quick) THEN quick = quick .AND. & dbcsr_memtype_equal( & dbcsr_data_get_memory_type(matrix%wms(1)%data_area), & dbcsr_data_get_memory_type(matrix%data_area)) quick = quick .AND. & ASSOCIATED(matrix%wms(1)%row_i) quick = quick .AND. & (matrix%wms(1)%datasize_after_filtering .LT. 0 .OR. & matrix%wms(1)%datasize .EQ. matrix%wms(1)%datasize_after_filtering) END IF ELSE quick = .FALSE. END IF END FUNCTION can_quickly_finalize SUBROUTINE quick_finalize(matrix) !! Performs quick finalization of matrix !! The data area from the work matrix is accepted as the new matrix's data !! area and the index is built from the work matrix. TYPE(dbcsr_type), INTENT(INOUT) :: matrix !! matrix to finalize CHARACTER(len=*), PARAMETER :: routineN = 'quick_finalize' INTEGER :: error_handle, nblks, nrows ! --------------------------------------------------------------------------- CALL timeset(routineN, error_handle) !$OMP SECTIONS !$OMP SECTION nblks = matrix%wms(1)%lastblk nrows = matrix%nblkrows_total CALL dbcsr_sort_indices(nblks, & matrix%wms(1)%row_i, & matrix%wms(1)%col_i, & matrix%wms(1)%blk_p) CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_row_p) CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_col_i) CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_blk_p) CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, & reservation=nrows + 1, extra=2*nblks) CALL dbcsr_make_dbcsr_index(matrix%row_p, matrix%wms(1)%row_i, & nrows, nblks) CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, & DATA=matrix%wms(1)%col_i(1:nblks)) CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, & DATA=matrix%wms(1)%blk_p(1:nblks)) matrix%nblks = nblks matrix%nze = matrix%wms(1)%datasize matrix%index(dbcsr_slot_nblks) = nblks matrix%index(dbcsr_slot_nze) = matrix%wms(1)%datasize CALL dbcsr_repoint_index(matrix) !$OMP SECTION CALL dbcsr_switch_data_area(matrix, matrix%wms(1)%data_area) !$OMP END SECTIONS CALL timestop(error_handle) END SUBROUTINE quick_finalize SUBROUTINE dbcsr_add_wm_from_matrix(matrix, limits) !! Creates a work matrix from the data present in a finalized matrix. TYPE(dbcsr_type), INTENT(INOUT) :: matrix !! DBCSR matrix INTEGER, DIMENSION(4), INTENT(IN), OPTIONAL :: limits !! the limits to use for copying CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_add_wm_from_matrix' INTEGER :: handle, ithread, nthreads, nwms, & old_nwms, size_used LOGICAL :: preexists ! --------------------------------------------------------------------------- CALL timeset(routineN, handle) !$OMP BARRIER preexists = ASSOCIATED(matrix%wms) IF (preexists) THEN old_nwms = SIZE(matrix%wms) IF (old_nwms .EQ. 0) & DBCSR_WARN("Nonexisting work matrices?!") ELSE old_nwms = 0 END IF nthreads = 1; ithread = 0 !$ nthreads = OMP_GET_NUM_THREADS(); ithread = OMP_GET_THREAD_NUM() IF (nthreads .GT. 1) THEN IF (old_nwms /= nthreads .AND. old_nwms /= 0) & DBCSR_ABORT("Number of work matrices and threads do not match") END IF nwms = MAX(1, old_nwms) !$ nwms = MAX(nwms, nthreads) !$OMP BARRIER !$OMP MASTER IF (.NOT. ASSOCIATED(matrix%wms)) THEN CALL dbcsr_work_create(matrix, & INT(matrix%nblks*default_resize_factor/nwms), & INT(matrix%nze*default_resize_factor/nwms), & n=nwms, work_mutable=.FALSE.) END IF !$OMP END MASTER !$OMP BARRIER size_used = matrix%nze CALL dbcsr_fill_wm_from_matrix(matrix%wms, matrix, size_used, & limits=limits) !$OMP BARRIER CALL timestop(handle) END SUBROUTINE dbcsr_add_wm_from_matrix SUBROUTINE dbcsr_fill_wm_from_matrix(wm, matrix, size_used, limits) !! Fills index and data of the work matrix from the !! previously-finalized one. !! !! limits !! The limits is a 4-tuple !! (lower_row, higher_row, lower_column, higher_column). TYPE(dbcsr_work_type), DIMENSION(:), INTENT(INOUT) :: wm !! the work matrix to fill TYPE(dbcsr_type), INTENT(INOUT) :: matrix !! DBCSR matrix INTEGER, INTENT(IN) :: size_used INTEGER, DIMENSION(4), INTENT(IN), OPTIONAL :: limits !! only fills blocks within this range CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_fill_wm_from_matrix' INTEGER :: blk, blk_p, col, handle, ithread, & nthreads, nwms, nze, row, wblk_p, & which_wm, wm_first, wm_last LOGICAL :: careful, limit, mt, tr LOGICAL, SAVE :: mutable TYPE(dbcsr_data_obj) :: data_block TYPE(dbcsr_iterator) :: iter ! --------------------------------------------------------------------------- CALL timeset(routineN, handle) nwms = SIZE(matrix%wms) mt = .FALSE. !$ IF (nwms .GT. 1) mt = omp_get_num_threads() .GT. 1 ithread = 0; nthreads = 1 !$ ithread = omp_get_thread_num() !$ nthreads = omp_get_num_threads() limit = PRESENT(limits) careful = size_used + size_used/8 & .LT. dbcsr_data_get_size_referenced(matrix%data_area) CALL dbcsr_data_init(data_block) CALL dbcsr_data_new(data_block, dbcsr_data_get_type(matrix%data_area)) IF (mt) THEN wm_first = ithread + 1 wm_last = ithread + 1 ELSE wm_first = 1 wm_last = nwms END IF ! Prepares the work matrices to accept the main data. !$OMP MASTER mutable = .FALSE. DO which_wm = 1, nwms mutable = mutable .OR. dbcsr_wm_use_mutable(wm(which_wm)) END DO !$OMP END MASTER !$OMP BARRIER DO which_wm = wm_first, wm_last IF (dbcsr_wm_use_mutable(wm(which_wm))) & DBCSR_ABORT("Adding main matrix into mutable not supported.") IF (mutable) THEN IF (.NOT. dbcsr_mutable_instantiated(wm(which_wm)%mutable)) THEN CALL dbcsr_mutable_new(wm(which_wm)%mutable, matrix%data_type) END IF ELSE ! We don't know how much data we'll get so we have to be generous. CALL dbcsr_data_ensure_size(wm(which_wm)%data_area, & size_used/nwms) CALL dbcsr_data_set_size_referenced(wm(which_wm)%data_area, 0) END IF END DO ! Now copy the data CALL dbcsr_iterator_start(iter, matrix, shared=mt, & contiguous_pointers=.FALSE., read_only=.TRUE.) DO WHILE (dbcsr_iterator_blocks_left(iter)) CALL dbcsr_iterator_next_block(iter, row, col, data_block, & transposed=tr, block_number=blk) IF (limit) THEN IF (.NOT. within_limits(row, col, limits)) CYCLE END IF blk_p = matrix%blk_p(blk) which_wm = ithread + 1 wblk_p = SIGN(wm(which_wm)%datasize + 1, blk_p) nze = dbcsr_data_get_size(data_block) IF (mt .OR. limit .OR. careful .OR. mutable) THEN ! The data gets copied block by block so the block pointers ! are ordered accordingly. IF (.NOT. mutable) THEN CALL add_work_coordinate(wm(which_wm), row, col, wblk_p) CALL dbcsr_data_ensure_size(wm(which_wm)%data_area, & ABS(wblk_p) + nze - 1, factor=default_resize_factor) CALL dbcsr_data_set_size_referenced(wm(which_wm)%data_area, & ABS(wblk_p) + nze - 1) CALL dbcsr_data_set(wm(which_wm)%data_area, & lb=ABS(wblk_p), & data_size=nze, & src=data_block, source_lb=1) END IF ELSE ! The data gets copied all at once so the block pointers ! should remain the same as they were. CALL add_work_coordinate(wm(which_wm), row, col, blk_p) END IF IF (.NOT. mutable) & wm(which_wm)%datasize = wm(which_wm)%datasize + nze END DO CALL dbcsr_iterator_stop(iter) CALL dbcsr_data_clear_pointer(data_block) CALL dbcsr_data_release(data_block) ! Copy all blocks at once IF (.NOT. mt .AND. .NOT. limit .AND. .NOT. careful .AND. .NOT. mutable) THEN DO which_wm = 1, nwms CALL dbcsr_data_ensure_size(wm(which_wm)%data_area, & dbcsr_data_get_size_referenced(matrix%data_area)) CALL dbcsr_data_copyall(wm(which_wm)%data_area, matrix%data_area) wm(which_wm)%datasize = size_used END DO END IF CALL timestop(handle) END SUBROUTINE dbcsr_fill_wm_from_matrix PURE FUNCTION within_limits(row, column, limits) !! Checks whether a point is within bounds !! \return whether the point is within the bounds INTEGER, INTENT(IN) :: row, column !! point to check !! point to check INTEGER, DIMENSION(4), INTENT(IN) :: limits !! limits (low_row, high_row, low_col, high_col) LOGICAL :: within_limits within_limits = row .GE. limits(1) .AND. row .LE. limits(2) .AND. & column .GE. limits(3) .AND. column .LE. limits(4) END FUNCTION within_limits SUBROUTINE dbcsr_work_destroy(wm) !! Deallocates and destroys a work matrix. TYPE(dbcsr_work_type), INTENT(INOUT) :: wm !! work matrix CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_work_destroy' INTEGER :: handle ! --------------------------------------------------------------------------- IF (careful_mod) CALL timeset(routineN, handle) IF (ASSOCIATED(wm%row_i)) THEN DEALLOCATE (wm%row_i) END IF IF (ASSOCIATED(wm%col_i)) THEN DEALLOCATE (wm%col_i) END IF IF (ASSOCIATED(wm%blk_p)) THEN DEALLOCATE (wm%blk_p) END IF CALL dbcsr_data_release(wm%data_area) CALL dbcsr_mutable_destroy(wm%mutable) IF (careful_mod) CALL timestop(handle) END SUBROUTINE dbcsr_work_destroy SUBROUTINE dbcsr_work_destroy_all(m) !! Deallocates and destroys a work matrix. TYPE(dbcsr_type), INTENT(INOUT) :: m CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_work_destroy_all' INTEGER :: handle, i ! --------------------------------------------------------------------------- CALL timeset(routineN, handle) IF (.NOT. ASSOCIATED(m%wms)) & DBCSR_WARN("Want to destroy nonexisting work matrices.") IF (ASSOCIATED(m%wms)) THEN DO i = 1, SIZE(m%wms) CALL dbcsr_work_destroy(m%wms(i)) END DO DEALLOCATE (m%wms) END IF CALL timestop(handle) END SUBROUTINE dbcsr_work_destroy_all SUBROUTINE add_work_coordinate(matrix, row, col, blk, index) !! Adds a coordinate (or other data) into a work matrix's row_i and !! col_i arrays and returns its position. !! @note Uses the matrix%lastblk to keep track of the current position. TYPE(dbcsr_work_type), INTENT(INOUT) :: matrix !! work matrix INTEGER, INTENT(IN) :: row, col !! row data to add !! col data to add INTEGER, INTENT(IN), OPTIONAL :: blk !! block pointer to add INTEGER, INTENT(OUT), OPTIONAL :: index !! saved position CHARACTER(len=*), PARAMETER :: routineN = 'add_work_coordinate' INTEGER :: handle ! --------------------------------------------------------------------------- IF (careful_mod) CALL timeset(routineN, handle) matrix%lastblk = matrix%lastblk + 1 CALL ensure_array_size(matrix%row_i, ub=matrix%lastblk, & factor=default_resize_factor) CALL ensure_array_size(matrix%col_i, ub=matrix%lastblk, & factor=default_resize_factor) matrix%row_i(matrix%lastblk) = row matrix%col_i(matrix%lastblk) = col IF (PRESENT(blk)) THEN CALL ensure_array_size(matrix%blk_p, ub=matrix%lastblk, & factor=default_resize_factor) matrix%blk_p(matrix%lastblk) = blk END IF IF (PRESENT(index)) index = matrix%lastblk IF (careful_mod) CALL timestop(handle) END SUBROUTINE add_work_coordinate SUBROUTINE dbcsr_merge_all(matrix, old_row_p, old_col_i, old_blk_p, & sort_data) !! Merge data from matrix and work matrices into the final matrix. TYPE(dbcsr_type), INTENT(INOUT) :: matrix !! matrix to work on INTEGER, DIMENSION(*), INTENT(IN) :: old_row_p, old_col_i, old_blk_p LOGICAL, INTENT(IN) :: sort_data !! whether data will be fully sorted CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_merge_all' LOGICAL, PARAMETER :: dbg = .FALSE. INTEGER :: handle, my_row_count, nblks, & nrows, nwms, row, t, & which_wm, wm_datasize INTEGER, ALLOCATABLE, DIMENSION(:), SAVE :: all_data_offsets, all_data_sizes, & new_blk_p_sorted, new_blk_sizes, & new_row_p INTEGER, ALLOCATABLE, DIMENSION(:), SAVE, TARGET :: blk_d, new_blk_p, new_col_i INTEGER, DIMENSION(:), CONTIGUOUS, POINTER :: my_row_p INTEGER, DIMENSION(:), POINTER, CONTIGUOUS, SAVE :: cbs, rbs INTEGER, SAVE :: max_row_count, new_nblks = 0, new_nze = 0 TYPE(dbcsr_data_obj), ALLOCATABLE, DIMENSION(:), & SAVE :: all_data_areas TYPE(dbcsr_work_type), POINTER :: wm TYPE(i_array_p), DIMENSION(:), POINTER, SAVE :: all_blk_p, all_col_i, all_row_p ! --------------------------------------------------------------------------- CALL timeset(routineN, handle) ! Outline: ! Each thread has a work matrix. These must be merged and made ! into a new index. If sort_data is False, then the data areas ! are simply appended. This is probably quicker but the data ! blocks are not in order and the data size may expand beyond ! what is needed. If sort_data is True, then data blocks are ! sorted in order. IF (dbg) WRITE (*, *) routineN//" starting, sort?", sort_data !$OMP BARRIER nrows = matrix%nblkrows_total nwms = SIZE(matrix%wms) !$ IF (nwms /= OMP_GET_NUM_THREADS()) & !$ DBCSR_ABORT("Number of threads does not match number of work matrices.") which_wm = 1 !$ which_wm = OMP_GET_THREAD_NUM() + 1 wm => matrix%wms(which_wm) ! ! Currently B-tree-based work matrices are converted to array form. IF (dbcsr_wm_use_mutable(wm)) THEN SELECT CASE (wm%mutable%m%data_type) CASE (dbcsr_type_real_4) CALL tree_to_linear_s(wm) CASE (dbcsr_type_real_8) CALL tree_to_linear_d(wm) CASE (dbcsr_type_complex_4) CALL tree_to_linear_c(wm) CASE (dbcsr_type_complex_8) CALL tree_to_linear_z(wm) CASE default DBCSR_ABORT("Invalid data type") END SELECT END IF ! ! Initializations and some counts from the threads are summed. ! !$OMP MASTER new_nblks = old_row_p(nrows + 1) new_nze = matrix%nze ALLOCATE (all_row_p(nwms)) ALLOCATE (all_col_i(nwms)) ALLOCATE (all_blk_p(nwms)) ALLOCATE (all_data_sizes(0:nwms)) ALLOCATE (all_data_offsets(nwms)) IF (sort_data) THEN ALLOCATE (all_data_areas(0:nwms)) CALL dbcsr_data_init(all_data_areas(0)) all_data_areas(0) = matrix%data_area !$OMP CRITICAL (crit_data) CALL dbcsr_data_hold(all_data_areas(0)) !$OMP END CRITICAL (crit_data) END IF ! The following is valid because the data actually referenced ! by blocks is explicitly calculated in dbcsr_finalize. all_data_sizes(0) = matrix%nze rbs => array_data(matrix%row_blk_size) cbs => array_data(matrix%col_blk_size) !$OMP END MASTER ! !$OMP BARRIER IF (sort_data .AND. wm%datasize_after_filtering .GE. 0) THEN wm_datasize = wm%datasize_after_filtering ELSE wm_datasize = wm%datasize END IF nblks = wm%lastblk !$OMP ATOMIC new_nblks = new_nblks + nblks !$OMP ATOMIC new_nze = new_nze + wm_datasize !$OMP BARRIER ! !$OMP MASTER ALLOCATE (new_row_p(nrows + 1)) ALLOCATE (new_col_i(new_nblks)) ALLOCATE (new_blk_p(new_nblks)) IF (sort_data) THEN ALLOCATE (blk_d(new_nblks)) ELSE ALLOCATE (blk_d(1)) END IF !$OMP END MASTER ! ! Each thread creates a row_p index for its new blocks. This gives the ! number of new blocks per thread per row. CALL dbcsr_sort_indices(nblks, wm%row_i, wm%col_i, wm%blk_p) ALLOCATE (my_row_p(nrows + 1)) CALL dbcsr_make_dbcsr_index(my_row_p, wm%row_i, nrows, nblks) ! ! The threads must share their row_p, col_i, blk_p, and data areas ! among themselves. all_row_p(which_wm)%p => my_row_p all_data_sizes(which_wm) = wm_datasize IF (.NOT. sort_data) THEN IF (wm_datasize > dbcsr_data_get_size_referenced(wm%data_area)) & DBCSR_ABORT("Data size mismatch.") END IF all_col_i(which_wm)%p => wm%col_i all_blk_p(which_wm)%p => wm%blk_p !$OMP BARRIER IF (sort_data) THEN !$OMP MASTER all_data_offsets(:) = 0 !$OMP END MASTER CALL dbcsr_data_init(all_data_areas(which_wm)) all_data_areas(which_wm) = wm%data_area !$OMP CRITICAL (crit_data) CALL dbcsr_data_hold(all_data_areas(which_wm)) !$OMP END CRITICAL (crit_data) ELSE !$OMP MASTER all_data_offsets(1) = all_data_sizes(0) DO t = 2, nwms all_data_offsets(t) = all_data_offsets(t - 1) + all_data_sizes(t - 1) END DO !$OMP END MASTER END IF ! ! The new row counts are created, then the new row_p index is created. ! !$OMP DO DO row = 1, nrows my_row_count = old_row_p(row + 1) - old_row_p(row) DO t = 1, nwms my_row_count = my_row_count & & + all_row_p(t)%p(row + 1) - all_row_p(t)%p(row) END DO new_row_p(row) = my_row_count END DO !$OMP END DO !$OMP MASTER max_row_count = MAXVAL(new_row_p(1:nrows)) CALL dbcsr_build_row_index(new_row_p, nrows) !$OMP END MASTER !$OMP BARRIER ! ! The new index is built CALL merge_index(new_row_p, new_col_i, new_blk_p, & blk_d, & old_row_p, old_col_i, old_blk_p, & all_row_p, all_col_i, all_blk_p, & all_data_offsets, nwms, nrows, max_row_count, sort_data) ! ! The data is then merged in one of two ways. ! !$OMP MASTER IF (sort_data) THEN ! The matrix gets a fresh data area. Blocks from the work ! matrices will be copied into it in order. ! !$OMP CRITICAL (crit_data) CALL dbcsr_data_release(matrix%data_area) CALL dbcsr_data_init(matrix%data_area) !$OMP END CRITICAL (crit_data) CALL dbcsr_data_new(matrix%data_area, & data_size=new_nze, & data_type=dbcsr_data_get_type(all_data_areas(0)), & memory_type=dbcsr_data_get_memory_type(all_data_areas(0))) ALLOCATE (new_blk_p_sorted(new_nblks)) ALLOCATE (new_blk_sizes(new_nblks)) ELSE ! Data stored in the work matrices will be just appended to the ! current data area. CALL dbcsr_data_ensure_size(matrix%data_area, new_nze) END IF !$OMP END MASTER !$OMP BARRIER IF (sort_data) THEN CALL dbcsr_calc_block_sizes(new_blk_sizes, & new_row_p, new_col_i, rbs, cbs) CALL dbcsr_sort_data(new_blk_p_sorted, new_blk_p, & new_blk_sizes, dsts=matrix%data_area, & src=all_data_areas(0), & srcs=all_data_areas, old_blk_d=blk_d) ELSE IF (all_data_sizes(which_wm) .GT. 0) THEN CALL dbcsr_data_copy(dst=matrix%data_area, & dst_lb=(/all_data_offsets(which_wm) + 1/), & dst_sizes=(/all_data_sizes(which_wm)/), & src=wm%data_area, & src_lb=(/1/), & src_sizes=(/all_data_sizes(which_wm)/)) END IF END IF ! ! Creates a new index array. ! !$OMP BARRIER !$OMP MASTER CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_row_p) CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_col_i) CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_blk_p) CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, & DATA=new_row_p(1:nrows + 1), extra=new_nblks*2) CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, & DATA=new_col_i(1:new_nblks)) IF (sort_data) THEN CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, & DATA=new_blk_p_sorted) ELSE CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, & DATA=new_blk_p) END IF matrix%nblks = new_nblks matrix%nze = new_nze matrix%index(dbcsr_slot_nblks) = matrix%nblks matrix%index(dbcsr_slot_nze) = matrix%nze CALL dbcsr_repoint_index(matrix) !$OMP END MASTER ! !$OMP BARRIER ! DEALLOCATE (my_row_p) IF (sort_data) THEN !$OMP MASTER !$OMP CRITICAL (crit_data) CALL dbcsr_data_release(all_data_areas(0)) !$OMP END CRITICAL (crit_data) DO which_wm = 1, nwms !$OMP CRITICAL (crit_data) CALL dbcsr_data_release(all_data_areas(which_wm)) !$OMP END CRITICAL (crit_data) END DO !$OMP END MASTER END IF !$OMP BARRIER !$OMP MASTER DEALLOCATE (all_row_p) DEALLOCATE (all_col_i) DEALLOCATE (all_blk_p) DEALLOCATE (new_row_p) DEALLOCATE (new_col_i) DEALLOCATE (new_blk_p) DEALLOCATE (all_data_sizes) DEALLOCATE (all_data_offsets) IF (sort_data) THEN DEALLOCATE (all_data_areas) DEALLOCATE (new_blk_sizes) DEALLOCATE (new_blk_p_sorted) END IF DEALLOCATE (blk_d) !$OMP END MASTER !$OMP BARRIER IF (dbg) WRITE (*, *) routineN//" stopped" CALL timestop(handle) END SUBROUTINE dbcsr_merge_all SUBROUTINE dbcsr_merge_single_wm(matrix) !! Sort data from the WM into the final matrix, based closely on dbcsr_merge_all TYPE(dbcsr_type), INTENT(INOUT) :: matrix !! matrix to work on CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_merge_single_wm' LOGICAL, PARAMETER :: dbg = .FALSE. INTEGER :: handle, nblks, nrows INTEGER, ALLOCATABLE, DIMENSION(:), SAVE :: new_blk_p_sorted, new_blk_sizes, & new_row_p INTEGER, ALLOCATABLE, DIMENSION(:), SAVE, TARGET :: blk_d INTEGER, DIMENSION(:), POINTER, CONTIGUOUS, SAVE :: cbs, rbs TYPE(dbcsr_work_type), POINTER :: wm ! --------------------------------------------------------------------------- CALL timeset(routineN, handle) ! Outline: ! There is a single work matrix. Data blocks are sorted and copied ! into the matrix data area (which is empty). The index is made consistent nrows = matrix%nblkrows_total wm => matrix%wms(1) nblks = wm%lastblk IF (dbcsr_wm_use_mutable(wm)) & DBCSR_ABORT("Number of threads does not match number of work matrices.") ! ! The following is valid because the data actually referenced ! by blocks is explicitly calculated in dbcsr_finalize. !$OMP MASTER rbs => array_data(matrix%row_blk_size) cbs => array_data(matrix%col_blk_size) ! ! Initializations ! ALLOCATE (new_row_p(nrows + 1)) ALLOCATE (blk_d(nblks)) ALLOCATE (new_blk_p_sorted(nblks)) ALLOCATE (new_blk_sizes(nblks)) ! ! Master thread creates a row_p index for the (sorted) blocks. CALL dbcsr_sort_indices(nblks, wm%row_i, wm%col_i, wm%blk_p) CALL dbcsr_make_dbcsr_index(new_row_p, wm%row_i, nrows, nblks) ! ! ! The matrix data area is resized. Blocks from the work ! matrices will be copied into it in order. ! CALL dbcsr_data_ensure_size(matrix%data_area, wm%datasize, nocopy=.TRUE.) !$OMP END MASTER !$OMP BARRIER CALL dbcsr_calc_block_sizes(new_blk_sizes, & new_row_p, wm%col_i, rbs, cbs) CALL dbcsr_sort_data(new_blk_p_sorted, wm%blk_p, & new_blk_sizes, dsts=matrix%data_area, & src=wm%data_area) ! ! Creates a new index array. ! !$OMP BARRIER !$OMP MASTER CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_row_p) CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_col_i) CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_blk_p) CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, & DATA=new_row_p(1:nrows + 1), extra=nblks*2) CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, & DATA=wm%col_i(1:nblks)) CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, & DATA=new_blk_p_sorted) matrix%nblks = nblks matrix%nze = wm%datasize matrix%index(dbcsr_slot_nblks) = matrix%nblks matrix%index(dbcsr_slot_nze) = matrix%nze CALL dbcsr_repoint_index(matrix) DEALLOCATE (new_row_p) DEALLOCATE (new_blk_sizes) DEALLOCATE (new_blk_p_sorted) DEALLOCATE (blk_d) !$OMP END MASTER IF (dbg) WRITE (*, *) routineN//" stopped" CALL timestop(handle) END SUBROUTINE dbcsr_merge_single_wm SUBROUTINE merge_index(new_row_p, new_col_i, new_blk_p, & !! Builds a new index from several work matrices. blk_d, old_row_p, old_col_i, old_blk_p, & all_row_p, all_col_i, all_blk_p, & all_data_offsets, nwms, nrows, max_row_count, sort_data) INTEGER, DIMENSION(*), INTENT(IN) :: new_row_p INTEGER, DIMENSION(*), INTENT(OUT), TARGET :: new_col_i, new_blk_p INTEGER, DIMENSION(*), INTENT(IN), TARGET :: blk_d INTEGER, DIMENSION(*), INTENT(IN) :: old_row_p, old_col_i, old_blk_p TYPE(i_array_p), DIMENSION(*), INTENT(IN) :: all_row_p, all_col_i, all_blk_p INTEGER, DIMENSION(*), INTENT(IN) :: all_data_offsets INTEGER, INTENT(IN) :: nwms, nrows, max_row_count LOGICAL, INTENT(IN) :: sort_data CHARACTER(len=*), PARAMETER :: routineN = 'merge_index' INTEGER :: blk1, blk2, error_handle, first_row_blk, & last_row_blk, row, src_blk_1, & src_blk_2, t INTEGER, ALLOCATABLE, DIMENSION(:) :: blk_p_buff, tmp_arr INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: blk_span, col_span, d_span ! --------------------------------------------------------------------------- CALL timeset(routineN, error_handle) ! ALLOCATE (tmp_arr(max_row_count)) ALLOCATE (blk_p_buff(max_row_count)) ! !$OMP DO DO row = 1, nrows first_row_blk = new_row_p(row) + 1 last_row_blk = new_row_p(row + 1) col_span => new_col_i(first_row_blk:last_row_blk) blk_span => new_blk_p(first_row_blk:last_row_blk) IF (sort_data) d_span => blk_d(first_row_blk:last_row_blk) src_blk_1 = old_row_p(row) + 1 src_blk_2 = old_row_p(row + 1) blk1 = 1 blk2 = blk1 + (src_blk_2 - src_blk_1 + 1) - 1 col_span(blk1:blk2) = old_col_i(src_blk_1:src_blk_2) blk_span(blk1:blk2) = old_blk_p(src_blk_1:src_blk_2) IF (sort_data) THEN d_span(blk1:blk2) = 1 DO t = 1, nwms src_blk_1 = all_row_p(t)%p(row) + 1 src_blk_2 = all_row_p(t)%p(row + 1) blk1 = blk2 + 1 blk2 = blk1 + (src_blk_2 - src_blk_1 + 1) - 1 col_span(blk1:blk2) = all_col_i(t)%p(src_blk_1:src_blk_2) blk_span(blk1:blk2) = all_blk_p(t)%p(src_blk_1:src_blk_2) d_span(blk1:blk2) = t + 1 END DO ELSE DO t = 1, nwms src_blk_1 = all_row_p(t)%p(row) + 1 src_blk_2 = all_row_p(t)%p(row + 1) blk1 = blk2 + 1 blk2 = blk1 + (src_blk_2 - src_blk_1 + 1) - 1 col_span(blk1:blk2) = all_col_i(t)%p(src_blk_1:src_blk_2) blk_span(blk1:blk2) = all_blk_p(t)%p(src_blk_1:src_blk_2) & & + all_data_offsets(t) END DO END IF CALL sort(col_span, SIZE(col_span), tmp_arr) blk_p_buff(1:SIZE(blk_span)) = blk_span(:) blk_span(1:SIZE(blk_span)) = blk_p_buff(tmp_arr(1:SIZE(blk_span))) END DO !$OMP END DO ! DEALLOCATE (tmp_arr) DEALLOCATE (blk_p_buff) ! CALL timestop(error_handle) END SUBROUTINE merge_index # 1 "/__w/dbcsr/dbcsr/src/work/../data/dbcsr.fypp" 1 # 9 "/__w/dbcsr/dbcsr/src/work/../data/dbcsr.fypp" # 11 "/__w/dbcsr/dbcsr/src/work/../data/dbcsr.fypp" # 169 "/__w/dbcsr/dbcsr/src/work/../data/dbcsr.fypp" # 1835 "/__w/dbcsr/dbcsr/src/work/dbcsr_work_operations.F" 2 # 1836 "/__w/dbcsr/dbcsr/src/work/dbcsr_work_operations.F" SUBROUTINE tree_to_linear_d (wm) !! Converts mutable data to linear (array) type. USE dbcsr_btree, & ONLY: btree_2d_data_d => btree_data_dp2d, & btree_destroy_d => btree_delete, & btree_size_d => btree_get_entries TYPE(dbcsr_work_type), INTENT(INOUT) :: wm !! work matrix to convert CHARACTER(len=*), PARAMETER :: routineN = 'tree_to_linear_d' INTEGER :: blk, blk_p, treesize, & error_handler, needed_size INTEGER(KIND=int_8), ALLOCATABLE, & DIMENSION(:) :: keys REAL(kind=real_8), DIMENSION(:), POINTER, CONTIGUOUS :: target_data REAL(kind=real_8), DIMENSION(:, :), POINTER :: block_2d TYPE(btree_2d_data_d), ALLOCATABLE, & DIMENSION(:) :: values ! --------------------------------------------------------------------------- CALL timeset(routineN, error_handler) ! srt = .TRUE. ! Not needed because of the copy treesize = btree_size_d (wm%mutable%m%btree_d) IF (wm%lastblk .NE. treesize) & DBCSR_ABORT("Mismatch in number of blocks") ALLOCATE (keys(treesize), values(treesize)) CALL btree_destroy_d (wm%mutable%m%btree_d, keys, values) CALL ensure_array_size(wm%row_i, ub=treesize) CALL ensure_array_size(wm%col_i, ub=treesize) CALL dbcsr_unpack_i8_2i4(keys, wm%row_i, & wm%col_i) ! For now we also fill the data, sloooowly, but this should ! be avoided and the data should be copied directly from the ! source in the subroutine's main loop. CALL ensure_array_size(wm%blk_p, ub=treesize) needed_size = 0 DO blk = 1, treesize block_2d => values(blk)%p needed_size = needed_size + SIZE(block_2d) END DO wm%datasize = needed_size CALL dbcsr_data_ensure_size(wm%data_area, & wm%datasize) target_data => dbcsr_get_data_p_d (wm%data_area) blk_p = 1 DO blk = 1, treesize block_2d => values(blk)%p IF (.NOT. values(blk)%tr) THEN wm%blk_p(blk) = blk_p ELSE wm%blk_p(blk) = -blk_p END IF CALL block_copy_d (target_data, block_2d, & SIZE(block_2d), blk_p, 1) blk_p = blk_p + SIZE(block_2d) DEALLOCATE (block_2d) END DO DEALLOCATE (keys, values) CALL dbcsr_mutable_release(wm%mutable) CALL timestop(error_handler) END SUBROUTINE tree_to_linear_d # 1836 "/__w/dbcsr/dbcsr/src/work/dbcsr_work_operations.F" SUBROUTINE tree_to_linear_s (wm) !! Converts mutable data to linear (array) type. USE dbcsr_btree, & ONLY: btree_2d_data_s => btree_data_sp2d, & btree_destroy_s => btree_delete, & btree_size_s => btree_get_entries TYPE(dbcsr_work_type), INTENT(INOUT) :: wm !! work matrix to convert CHARACTER(len=*), PARAMETER :: routineN = 'tree_to_linear_s' INTEGER :: blk, blk_p, treesize, & error_handler, needed_size INTEGER(KIND=int_8), ALLOCATABLE, & DIMENSION(:) :: keys REAL(kind=real_4), DIMENSION(:), POINTER, CONTIGUOUS :: target_data REAL(kind=real_4), DIMENSION(:, :), POINTER :: block_2d TYPE(btree_2d_data_s), ALLOCATABLE, & DIMENSION(:) :: values ! --------------------------------------------------------------------------- CALL timeset(routineN, error_handler) ! srt = .TRUE. ! Not needed because of the copy treesize = btree_size_s (wm%mutable%m%btree_s) IF (wm%lastblk .NE. treesize) & DBCSR_ABORT("Mismatch in number of blocks") ALLOCATE (keys(treesize), values(treesize)) CALL btree_destroy_s (wm%mutable%m%btree_s, keys, values) CALL ensure_array_size(wm%row_i, ub=treesize) CALL ensure_array_size(wm%col_i, ub=treesize) CALL dbcsr_unpack_i8_2i4(keys, wm%row_i, & wm%col_i) ! For now we also fill the data, sloooowly, but this should ! be avoided and the data should be copied directly from the ! source in the subroutine's main loop. CALL ensure_array_size(wm%blk_p, ub=treesize) needed_size = 0 DO blk = 1, treesize block_2d => values(blk)%p needed_size = needed_size + SIZE(block_2d) END DO wm%datasize = needed_size CALL dbcsr_data_ensure_size(wm%data_area, & wm%datasize) target_data => dbcsr_get_data_p_s (wm%data_area) blk_p = 1 DO blk = 1, treesize block_2d => values(blk)%p IF (.NOT. values(blk)%tr) THEN wm%blk_p(blk) = blk_p ELSE wm%blk_p(blk) = -blk_p END IF CALL block_copy_s (target_data, block_2d, & SIZE(block_2d), blk_p, 1) blk_p = blk_p + SIZE(block_2d) DEALLOCATE (block_2d) END DO DEALLOCATE (keys, values) CALL dbcsr_mutable_release(wm%mutable) CALL timestop(error_handler) END SUBROUTINE tree_to_linear_s # 1836 "/__w/dbcsr/dbcsr/src/work/dbcsr_work_operations.F" SUBROUTINE tree_to_linear_z (wm) !! Converts mutable data to linear (array) type. USE dbcsr_btree, & ONLY: btree_2d_data_z => btree_data_zp2d, & btree_destroy_z => btree_delete, & btree_size_z => btree_get_entries TYPE(dbcsr_work_type), INTENT(INOUT) :: wm !! work matrix to convert CHARACTER(len=*), PARAMETER :: routineN = 'tree_to_linear_z' INTEGER :: blk, blk_p, treesize, & error_handler, needed_size INTEGER(KIND=int_8), ALLOCATABLE, & DIMENSION(:) :: keys COMPLEX(kind=real_8), DIMENSION(:), POINTER, CONTIGUOUS :: target_data COMPLEX(kind=real_8), DIMENSION(:, :), POINTER :: block_2d TYPE(btree_2d_data_z), ALLOCATABLE, & DIMENSION(:) :: values ! --------------------------------------------------------------------------- CALL timeset(routineN, error_handler) ! srt = .TRUE. ! Not needed because of the copy treesize = btree_size_z (wm%mutable%m%btree_z) IF (wm%lastblk .NE. treesize) & DBCSR_ABORT("Mismatch in number of blocks") ALLOCATE (keys(treesize), values(treesize)) CALL btree_destroy_z (wm%mutable%m%btree_z, keys, values) CALL ensure_array_size(wm%row_i, ub=treesize) CALL ensure_array_size(wm%col_i, ub=treesize) CALL dbcsr_unpack_i8_2i4(keys, wm%row_i, & wm%col_i) ! For now we also fill the data, sloooowly, but this should ! be avoided and the data should be copied directly from the ! source in the subroutine's main loop. CALL ensure_array_size(wm%blk_p, ub=treesize) needed_size = 0 DO blk = 1, treesize block_2d => values(blk)%p needed_size = needed_size + SIZE(block_2d) END DO wm%datasize = needed_size CALL dbcsr_data_ensure_size(wm%data_area, & wm%datasize) target_data => dbcsr_get_data_p_z (wm%data_area) blk_p = 1 DO blk = 1, treesize block_2d => values(blk)%p IF (.NOT. values(blk)%tr) THEN wm%blk_p(blk) = blk_p ELSE wm%blk_p(blk) = -blk_p END IF CALL block_copy_z (target_data, block_2d, & SIZE(block_2d), blk_p, 1) blk_p = blk_p + SIZE(block_2d) DEALLOCATE (block_2d) END DO DEALLOCATE (keys, values) CALL dbcsr_mutable_release(wm%mutable) CALL timestop(error_handler) END SUBROUTINE tree_to_linear_z # 1836 "/__w/dbcsr/dbcsr/src/work/dbcsr_work_operations.F" SUBROUTINE tree_to_linear_c (wm) !! Converts mutable data to linear (array) type. USE dbcsr_btree, & ONLY: btree_2d_data_c => btree_data_cp2d, & btree_destroy_c => btree_delete, & btree_size_c => btree_get_entries TYPE(dbcsr_work_type), INTENT(INOUT) :: wm !! work matrix to convert CHARACTER(len=*), PARAMETER :: routineN = 'tree_to_linear_c' INTEGER :: blk, blk_p, treesize, & error_handler, needed_size INTEGER(KIND=int_8), ALLOCATABLE, & DIMENSION(:) :: keys COMPLEX(kind=real_4), DIMENSION(:), POINTER, CONTIGUOUS :: target_data COMPLEX(kind=real_4), DIMENSION(:, :), POINTER :: block_2d TYPE(btree_2d_data_c), ALLOCATABLE, & DIMENSION(:) :: values ! --------------------------------------------------------------------------- CALL timeset(routineN, error_handler) ! srt = .TRUE. ! Not needed because of the copy treesize = btree_size_c (wm%mutable%m%btree_c) IF (wm%lastblk .NE. treesize) & DBCSR_ABORT("Mismatch in number of blocks") ALLOCATE (keys(treesize), values(treesize)) CALL btree_destroy_c (wm%mutable%m%btree_c, keys, values) CALL ensure_array_size(wm%row_i, ub=treesize) CALL ensure_array_size(wm%col_i, ub=treesize) CALL dbcsr_unpack_i8_2i4(keys, wm%row_i, & wm%col_i) ! For now we also fill the data, sloooowly, but this should ! be avoided and the data should be copied directly from the ! source in the subroutine's main loop. CALL ensure_array_size(wm%blk_p, ub=treesize) needed_size = 0 DO blk = 1, treesize block_2d => values(blk)%p needed_size = needed_size + SIZE(block_2d) END DO wm%datasize = needed_size CALL dbcsr_data_ensure_size(wm%data_area, & wm%datasize) target_data => dbcsr_get_data_p_c (wm%data_area) blk_p = 1 DO blk = 1, treesize block_2d => values(blk)%p IF (.NOT. values(blk)%tr) THEN wm%blk_p(blk) = blk_p ELSE wm%blk_p(blk) = -blk_p END IF CALL block_copy_c (target_data, block_2d, & SIZE(block_2d), blk_p, 1) blk_p = blk_p + SIZE(block_2d) DEALLOCATE (block_2d) END DO DEALLOCATE (keys, values) CALL dbcsr_mutable_release(wm%mutable) CALL timestop(error_handler) END SUBROUTINE tree_to_linear_c # 1902 "/__w/dbcsr/dbcsr/src/work/dbcsr_work_operations.F" END MODULE dbcsr_work_operations