Merge data from matrix and work matrices into the final matrix.
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(dbcsr_type), | intent(inout) | :: | matrix |
matrix to work on |
||
integer, | intent(in), | DIMENSION(*) | :: | old_row_p | ||
integer, | intent(in), | DIMENSION(*) | :: | old_col_i | ||
integer, | intent(in), | DIMENSION(*) | :: | old_blk_p | ||
logical, | intent(in) | :: | sort_data |
whether data will be fully sorted |
SUBROUTINE dbcsr_merge_all(matrix, old_row_p, old_col_i, old_blk_p, & sort_data) !! Merge data from matrix and work matrices into the final matrix. TYPE(dbcsr_type), INTENT(INOUT) :: matrix !! matrix to work on INTEGER, DIMENSION(*), INTENT(IN) :: old_row_p, old_col_i, old_blk_p LOGICAL, INTENT(IN) :: sort_data !! whether data will be fully sorted CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_merge_all' LOGICAL, PARAMETER :: dbg = .FALSE. INTEGER :: handle, my_row_count, nblks, & nrows, nwms, row, t, & which_wm, wm_datasize INTEGER, ALLOCATABLE, DIMENSION(:), SAVE :: all_data_offsets, all_data_sizes, & new_blk_p_sorted, new_blk_sizes, & new_row_p INTEGER, ALLOCATABLE, DIMENSION(:), SAVE, TARGET :: blk_d, new_blk_p, new_col_i INTEGER, DIMENSION(:), CONTIGUOUS, POINTER :: my_row_p INTEGER, DIMENSION(:), POINTER, CONTIGUOUS, SAVE :: cbs, rbs INTEGER, SAVE :: max_row_count, new_nblks = 0, new_nze = 0 TYPE(dbcsr_data_obj), ALLOCATABLE, DIMENSION(:), & SAVE :: all_data_areas TYPE(dbcsr_work_type), POINTER :: wm TYPE(i_array_p), DIMENSION(:), POINTER, SAVE :: all_blk_p, all_col_i, all_row_p ! --------------------------------------------------------------------------- CALL timeset(routineN, handle) ! Outline: ! Each thread has a work matrix. These must be merged and made ! into a new index. If sort_data is False, then the data areas ! are simply appended. This is probably quicker but the data ! blocks are not in order and the data size may expand beyond ! what is needed. If sort_data is True, then data blocks are ! sorted in order. IF (dbg) WRITE (*, *) routineN//" starting, sort?", sort_data !$OMP BARRIER nrows = matrix%nblkrows_total nwms = SIZE(matrix%wms) !$ IF (nwms /= OMP_GET_NUM_THREADS()) & !$ DBCSR_ABORT("Number of threads does not match number of work matrices.") which_wm = 1 !$ which_wm = OMP_GET_THREAD_NUM() + 1 wm => matrix%wms(which_wm) ! ! Currently B-tree-based work matrices are converted to array form. IF (dbcsr_wm_use_mutable(wm)) THEN SELECT CASE (wm%mutable%m%data_type) CASE (dbcsr_type_real_4) CALL tree_to_linear_s(wm) CASE (dbcsr_type_real_8) CALL tree_to_linear_d(wm) CASE (dbcsr_type_complex_4) CALL tree_to_linear_c(wm) CASE (dbcsr_type_complex_8) CALL tree_to_linear_z(wm) CASE default DBCSR_ABORT("Invalid data type") END SELECT END IF ! ! Initializations and some counts from the threads are summed. ! !$OMP MASTER new_nblks = old_row_p(nrows + 1) new_nze = matrix%nze ALLOCATE (all_row_p(nwms)) ALLOCATE (all_col_i(nwms)) ALLOCATE (all_blk_p(nwms)) ALLOCATE (all_data_sizes(0:nwms)) ALLOCATE (all_data_offsets(nwms)) IF (sort_data) THEN ALLOCATE (all_data_areas(0:nwms)) CALL dbcsr_data_init(all_data_areas(0)) all_data_areas(0) = matrix%data_area !$OMP CRITICAL (crit_data) CALL dbcsr_data_hold(all_data_areas(0)) !$OMP END CRITICAL (crit_data) END IF ! The following is valid because the data actually referenced ! by blocks is explicitly calculated in dbcsr_finalize. all_data_sizes(0) = matrix%nze rbs => array_data(matrix%row_blk_size) cbs => array_data(matrix%col_blk_size) !$OMP END MASTER ! !$OMP BARRIER IF (sort_data .AND. wm%datasize_after_filtering .GE. 0) THEN wm_datasize = wm%datasize_after_filtering ELSE wm_datasize = wm%datasize END IF nblks = wm%lastblk !$OMP ATOMIC new_nblks = new_nblks + nblks !$OMP ATOMIC new_nze = new_nze + wm_datasize !$OMP BARRIER ! !$OMP MASTER ALLOCATE (new_row_p(nrows + 1)) ALLOCATE (new_col_i(new_nblks)) ALLOCATE (new_blk_p(new_nblks)) IF (sort_data) THEN ALLOCATE (blk_d(new_nblks)) ELSE ALLOCATE (blk_d(1)) END IF !$OMP END MASTER ! ! Each thread creates a row_p index for its new blocks. This gives the ! number of new blocks per thread per row. CALL dbcsr_sort_indices(nblks, wm%row_i, wm%col_i, wm%blk_p) ALLOCATE (my_row_p(nrows + 1)) CALL dbcsr_make_dbcsr_index(my_row_p, wm%row_i, nrows, nblks) ! ! The threads must share their row_p, col_i, blk_p, and data areas ! among themselves. all_row_p(which_wm)%p => my_row_p all_data_sizes(which_wm) = wm_datasize IF (.NOT. sort_data) THEN IF (wm_datasize > dbcsr_data_get_size_referenced(wm%data_area)) & DBCSR_ABORT("Data size mismatch.") END IF all_col_i(which_wm)%p => wm%col_i all_blk_p(which_wm)%p => wm%blk_p !$OMP BARRIER IF (sort_data) THEN !$OMP MASTER all_data_offsets(:) = 0 !$OMP END MASTER CALL dbcsr_data_init(all_data_areas(which_wm)) all_data_areas(which_wm) = wm%data_area !$OMP CRITICAL (crit_data) CALL dbcsr_data_hold(all_data_areas(which_wm)) !$OMP END CRITICAL (crit_data) ELSE !$OMP MASTER all_data_offsets(1) = all_data_sizes(0) DO t = 2, nwms all_data_offsets(t) = all_data_offsets(t - 1) + all_data_sizes(t - 1) END DO !$OMP END MASTER END IF ! ! The new row counts are created, then the new row_p index is created. ! !$OMP DO DO row = 1, nrows my_row_count = old_row_p(row + 1) - old_row_p(row) DO t = 1, nwms my_row_count = my_row_count & & + all_row_p(t)%p(row + 1) - all_row_p(t)%p(row) END DO new_row_p(row) = my_row_count END DO !$OMP END DO !$OMP MASTER max_row_count = MAXVAL(new_row_p(1:nrows)) CALL dbcsr_build_row_index(new_row_p, nrows) !$OMP END MASTER !$OMP BARRIER ! ! The new index is built CALL merge_index(new_row_p, new_col_i, new_blk_p, & blk_d, & old_row_p, old_col_i, old_blk_p, & all_row_p, all_col_i, all_blk_p, & all_data_offsets, nwms, nrows, max_row_count, sort_data) ! ! The data is then merged in one of two ways. ! !$OMP MASTER IF (sort_data) THEN ! The matrix gets a fresh data area. Blocks from the work ! matrices will be copied into it in order. ! !$OMP CRITICAL (crit_data) CALL dbcsr_data_release(matrix%data_area) CALL dbcsr_data_init(matrix%data_area) !$OMP END CRITICAL (crit_data) CALL dbcsr_data_new(matrix%data_area, & data_size=new_nze, & data_type=dbcsr_data_get_type(all_data_areas(0)), & memory_type=dbcsr_data_get_memory_type(all_data_areas(0))) ALLOCATE (new_blk_p_sorted(new_nblks)) ALLOCATE (new_blk_sizes(new_nblks)) ELSE ! Data stored in the work matrices will be just appended to the ! current data area. CALL dbcsr_data_ensure_size(matrix%data_area, new_nze) END IF !$OMP END MASTER !$OMP BARRIER IF (sort_data) THEN CALL dbcsr_calc_block_sizes(new_blk_sizes, & new_row_p, new_col_i, rbs, cbs) CALL dbcsr_sort_data(new_blk_p_sorted, new_blk_p, & new_blk_sizes, dsts=matrix%data_area, & src=all_data_areas(0), & srcs=all_data_areas, old_blk_d=blk_d) ELSE IF (all_data_sizes(which_wm) .GT. 0) THEN CALL dbcsr_data_copy(dst=matrix%data_area, & dst_lb=(/all_data_offsets(which_wm) + 1/), & dst_sizes=(/all_data_sizes(which_wm)/), & src=wm%data_area, & src_lb=(/1/), & src_sizes=(/all_data_sizes(which_wm)/)) END IF END IF ! ! Creates a new index array. ! !$OMP BARRIER !$OMP MASTER CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_row_p) CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_col_i) CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_blk_p) CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, & DATA=new_row_p(1:nrows + 1), extra=new_nblks*2) CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, & DATA=new_col_i(1:new_nblks)) IF (sort_data) THEN CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, & DATA=new_blk_p_sorted) ELSE CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, & DATA=new_blk_p) END IF matrix%nblks = new_nblks matrix%nze = new_nze matrix%index(dbcsr_slot_nblks) = matrix%nblks matrix%index(dbcsr_slot_nze) = matrix%nze CALL dbcsr_repoint_index(matrix) !$OMP END MASTER ! !$OMP BARRIER ! DEALLOCATE (my_row_p) IF (sort_data) THEN !$OMP MASTER !$OMP CRITICAL (crit_data) CALL dbcsr_data_release(all_data_areas(0)) !$OMP END CRITICAL (crit_data) DO which_wm = 1, nwms !$OMP CRITICAL (crit_data) CALL dbcsr_data_release(all_data_areas(which_wm)) !$OMP END CRITICAL (crit_data) END DO !$OMP END MASTER END IF !$OMP BARRIER !$OMP MASTER DEALLOCATE (all_row_p) DEALLOCATE (all_col_i) DEALLOCATE (all_blk_p) DEALLOCATE (new_row_p) DEALLOCATE (new_col_i) DEALLOCATE (new_blk_p) DEALLOCATE (all_data_sizes) DEALLOCATE (all_data_offsets) IF (sort_data) THEN DEALLOCATE (all_data_areas) DEALLOCATE (new_blk_sizes) DEALLOCATE (new_blk_p_sorted) END IF DEALLOCATE (blk_d) !$OMP END MASTER !$OMP BARRIER IF (dbg) WRITE (*, *) routineN//" stopped" CALL timestop(handle) END SUBROUTINE dbcsr_merge_all