Merge data from matrix and work matrices into the final matrix.
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(dbcsr_type), | intent(inout) | :: | matrix |
matrix to work on |
||
integer, | intent(in), | DIMENSION(*) | :: | old_row_p | ||
integer, | intent(in), | DIMENSION(*) | :: | old_col_i | ||
integer, | intent(in), | DIMENSION(*) | :: | old_blk_p | ||
logical, | intent(in) | :: | sort_data |
whether data will be fully sorted |
SUBROUTINE dbcsr_merge_all(matrix, old_row_p, old_col_i, old_blk_p, &
sort_data)
!! Merge data from matrix and work matrices into the final matrix.
TYPE(dbcsr_type), INTENT(INOUT) :: matrix
!! matrix to work on
INTEGER, DIMENSION(*), INTENT(IN) :: old_row_p, old_col_i, old_blk_p
LOGICAL, INTENT(IN) :: sort_data
!! whether data will be fully sorted
CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_merge_all'
LOGICAL, PARAMETER :: dbg = .FALSE.
INTEGER :: handle, my_row_count, nblks, &
nrows, nwms, row, t, &
which_wm, wm_datasize
INTEGER, ALLOCATABLE, DIMENSION(:), SAVE :: all_data_offsets, all_data_sizes, &
new_blk_p_sorted, new_blk_sizes, &
new_row_p
INTEGER, ALLOCATABLE, DIMENSION(:), SAVE, TARGET :: blk_d, new_blk_p, new_col_i
INTEGER, DIMENSION(:), CONTIGUOUS, POINTER :: my_row_p
INTEGER, DIMENSION(:), POINTER, CONTIGUOUS, SAVE :: cbs, rbs
INTEGER, SAVE :: max_row_count, new_nblks = 0, new_nze = 0
TYPE(dbcsr_data_obj), ALLOCATABLE, DIMENSION(:), &
SAVE :: all_data_areas
TYPE(dbcsr_work_type), POINTER :: wm
TYPE(i_array_p), DIMENSION(:), POINTER, SAVE :: all_blk_p, all_col_i, all_row_p
! ---------------------------------------------------------------------------
CALL timeset(routineN, handle)
! Outline:
! Each thread has a work matrix. These must be merged and made
! into a new index. If sort_data is False, then the data areas
! are simply appended. This is probably quicker but the data
! blocks are not in order and the data size may expand beyond
! what is needed. If sort_data is True, then data blocks are
! sorted in order.
IF (dbg) WRITE (*, *) routineN//" starting, sort?", sort_data
!$OMP BARRIER
nrows = matrix%nblkrows_total
nwms = SIZE(matrix%wms)
!$ IF (nwms /= OMP_GET_NUM_THREADS()) &
!$ DBCSR_ABORT("Number of threads does not match number of work matrices.")
which_wm = 1
!$ which_wm = OMP_GET_THREAD_NUM() + 1
wm => matrix%wms(which_wm)
!
! Currently B-tree-based work matrices are converted to array form.
IF (dbcsr_wm_use_mutable(wm)) THEN
SELECT CASE (wm%mutable%m%data_type)
CASE (dbcsr_type_real_4)
CALL tree_to_linear_s(wm)
CASE (dbcsr_type_real_8)
CALL tree_to_linear_d(wm)
CASE (dbcsr_type_complex_4)
CALL tree_to_linear_c(wm)
CASE (dbcsr_type_complex_8)
CALL tree_to_linear_z(wm)
CASE default
DBCSR_ABORT("Invalid data type")
END SELECT
END IF
!
! Initializations and some counts from the threads are summed.
!
!$OMP MASTER
new_nblks = old_row_p(nrows + 1)
new_nze = matrix%nze
ALLOCATE (all_row_p(nwms))
ALLOCATE (all_col_i(nwms))
ALLOCATE (all_blk_p(nwms))
ALLOCATE (all_data_sizes(0:nwms))
ALLOCATE (all_data_offsets(nwms))
IF (sort_data) ALLOCATE (all_data_areas(0:nwms))
IF (sort_data) THEN
CALL dbcsr_data_init(all_data_areas(0))
all_data_areas(0) = matrix%data_area
!$OMP CRITICAL (crit_data)
CALL dbcsr_data_hold(all_data_areas(0))
!$OMP END CRITICAL (crit_data)
END IF
! The following is valid because the data actually referenced
! by blocks is explicitly calculated in dbcsr_finalize.
all_data_sizes(0) = matrix%nze
rbs => array_data(matrix%row_blk_size)
cbs => array_data(matrix%col_blk_size)
!$OMP END MASTER
!
!$OMP BARRIER
IF (sort_data .AND. wm%datasize_after_filtering .GE. 0) THEN
wm_datasize = wm%datasize_after_filtering
ELSE
wm_datasize = wm%datasize
END IF
nblks = wm%lastblk
!$OMP ATOMIC
new_nblks = new_nblks + nblks
!$OMP ATOMIC
new_nze = new_nze + wm_datasize
!$OMP BARRIER
!
!$OMP MASTER
ALLOCATE (new_row_p(nrows + 1))
ALLOCATE (new_col_i(new_nblks))
ALLOCATE (new_blk_p(new_nblks))
IF (sort_data) THEN
ALLOCATE (blk_d(new_nblks))
ELSE
ALLOCATE (blk_d(1))
END IF
!$OMP END MASTER
!
! Each thread creates a row_p index for its new blocks. This gives the
! number of new blocks per thread per row.
CALL dbcsr_sort_indices(nblks, wm%row_i, wm%col_i, wm%blk_p)
ALLOCATE (my_row_p(nrows + 1))
CALL dbcsr_make_dbcsr_index(my_row_p, wm%row_i, nrows, nblks)
!
! The threads must share their row_p, col_i, blk_p, and data areas
! among themselves.
all_row_p(which_wm)%p => my_row_p
all_data_sizes(which_wm) = wm_datasize
IF (.NOT. sort_data) THEN
IF (wm_datasize > dbcsr_data_get_size_referenced(wm%data_area)) &
DBCSR_ABORT("Data size mismatch.")
END IF
all_col_i(which_wm)%p => wm%col_i
all_blk_p(which_wm)%p => wm%blk_p
!$OMP BARRIER
IF (sort_data) THEN
!$OMP MASTER
all_data_offsets(:) = 0
!$OMP END MASTER
CALL dbcsr_data_init(all_data_areas(which_wm))
all_data_areas(which_wm) = wm%data_area
!$OMP CRITICAL (crit_data)
CALL dbcsr_data_hold(all_data_areas(which_wm))
!$OMP END CRITICAL (crit_data)
ELSE
!$OMP MASTER
all_data_offsets(1) = all_data_sizes(0)
DO t = 2, nwms
all_data_offsets(t) = all_data_offsets(t - 1) + all_data_sizes(t - 1)
END DO
!$OMP END MASTER
END IF
!
! The new row counts are created, then the new row_p index is created.
!
!$OMP DO
DO row = 1, nrows
my_row_count = old_row_p(row + 1) - old_row_p(row)
DO t = 1, nwms
my_row_count = my_row_count &
& + all_row_p(t)%p(row + 1) - all_row_p(t)%p(row)
END DO
new_row_p(row) = my_row_count
END DO
!$OMP END DO
!$OMP MASTER
max_row_count = MAXVAL(new_row_p(1:nrows))
CALL dbcsr_build_row_index(new_row_p, nrows)
!$OMP END MASTER
!$OMP BARRIER
!
! The new index is built
CALL merge_index(new_row_p, new_col_i, new_blk_p, &
blk_d, &
old_row_p, old_col_i, old_blk_p, &
all_row_p, all_col_i, all_blk_p, &
all_data_offsets, nwms, nrows, max_row_count, sort_data)
!
! The data is then merged in one of two ways.
!
!$OMP MASTER
IF (sort_data) THEN
! The matrix gets a fresh data area. Blocks from the work
! matrices will be copied into it in order.
!
!$OMP CRITICAL (crit_data)
CALL dbcsr_data_release(matrix%data_area)
CALL dbcsr_data_init(matrix%data_area)
!$OMP END CRITICAL (crit_data)
CALL dbcsr_data_new(matrix%data_area, &
data_size=new_nze, &
data_type=dbcsr_data_get_type(all_data_areas(0)), &
memory_type=dbcsr_data_get_memory_type(all_data_areas(0)))
ALLOCATE (new_blk_p_sorted(new_nblks))
ALLOCATE (new_blk_sizes(new_nblks))
ELSE
! Data stored in the work matrices will be just appended to the
! current data area.
CALL dbcsr_data_ensure_size(matrix%data_area, new_nze)
END IF
!$OMP END MASTER
!$OMP BARRIER
IF (sort_data) THEN
CALL dbcsr_calc_block_sizes(new_blk_sizes, &
new_row_p, new_col_i, rbs, cbs)
CALL dbcsr_sort_data(new_blk_p_sorted, new_blk_p, &
new_blk_sizes, dsts=matrix%data_area, &
src=all_data_areas(0), &
srcs=all_data_areas, old_blk_d=blk_d)
ELSE
IF (all_data_sizes(which_wm) .GT. 0) THEN
CALL dbcsr_data_copy(dst=matrix%data_area, &
dst_lb=(/all_data_offsets(which_wm) + 1/), &
dst_sizes=(/all_data_sizes(which_wm)/), &
src=wm%data_area, &
src_lb=(/1/), &
src_sizes=(/all_data_sizes(which_wm)/))
END IF
END IF
!
! Creates a new index array.
!
!$OMP BARRIER
!$OMP MASTER
CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_row_p)
CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_col_i)
CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_blk_p)
CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, &
DATA=new_row_p(1:nrows + 1), extra=new_nblks*2)
CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, &
DATA=new_col_i(1:new_nblks))
IF (sort_data) THEN
CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, &
DATA=new_blk_p_sorted)
ELSE
CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, &
DATA=new_blk_p)
END IF
matrix%nblks = new_nblks
matrix%nze = new_nze
matrix%index(dbcsr_slot_nblks) = matrix%nblks
matrix%index(dbcsr_slot_nze) = matrix%nze
CALL dbcsr_repoint_index(matrix)
!$OMP END MASTER
!
!$OMP BARRIER
!
DEALLOCATE (my_row_p)
IF (sort_data) THEN
!$OMP MASTER
!$OMP CRITICAL (crit_data)
CALL dbcsr_data_release(all_data_areas(0))
!$OMP END CRITICAL (crit_data)
DO which_wm = 1, nwms
!$OMP CRITICAL (crit_data)
CALL dbcsr_data_release(all_data_areas(which_wm))
!$OMP END CRITICAL (crit_data)
END DO
!$OMP END MASTER
END IF
!$OMP BARRIER
!$OMP MASTER
DEALLOCATE (all_row_p)
DEALLOCATE (all_col_i)
DEALLOCATE (all_blk_p)
DEALLOCATE (new_row_p)
DEALLOCATE (new_col_i)
DEALLOCATE (new_blk_p)
DEALLOCATE (all_data_sizes)
DEALLOCATE (all_data_offsets)
IF (sort_data) THEN
DEALLOCATE (all_data_areas)
DEALLOCATE (new_blk_sizes)
DEALLOCATE (new_blk_p_sorted)
END IF
DEALLOCATE (blk_d)
!$OMP END MASTER
!$OMP BARRIER
IF (dbg) WRITE (*, *) routineN//" stopped"
CALL timestop(handle)
END SUBROUTINE dbcsr_merge_all