1 #ifndef TTG_PARSEC_DEVICEFUNC_H
2 #define TTG_PARSEC_DEVICEFUNC_H
4 #if defined(TTG_HAVE_CUDART)
10 #include <parsec/mca/device/device_gpu.h>
12 #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT)
13 #include <parsec/mca/device/cuda/device_cuda.h>
14 #elif defined(PARSEC_HAVE_DEV_HIP_SUPPORT)
15 #include <parsec/mca/device/hip/device_hip.h>
20 template<
typename... Views, std::size_t I, std::size_t... Is>
22 static_assert(I < MAX_PARAM_COUNT,
23 "PaRSEC only supports MAX_PARAM_COUNT device input/outputs. "
24 "Increase MAX_PARAM_COUNT and recompile PaRSEC/TTG.");
25 using view_type = std::remove_reference_t<std::tuple_element_t<I, std::tuple<Views&...>>>;
27 assert(
nullptr != caller->
dev_ptr);
31 auto& view = std::get<I>(views);
32 bool is_current =
false;
33 static_assert(ttg::meta::is_buffer_v<view_type> || ttg::meta::is_devicescratch_v<view_type>);
39 if (
nullptr !=
data) {
40 auto access = PARSEC_FLOW_ACCESS_RW;
41 if constexpr (std::is_const_v<view_type>) {
43 access = PARSEC_FLOW_ACCESS_READ;
44 }
else if constexpr (ttg::meta::is_devicescratch_v<view_type>) {
46 access = PARSEC_FLOW_ACCESS_WRITE;
52 flows[I] = parsec_flow_t{.name =
nullptr,
53 .sym_type = PARSEC_SYM_INOUT,
54 .flow_flags =
static_cast<uint8_t
>(access),
56 .flow_datatype_mask = ~0 };
58 gpu_task->flow_nb_elts[I] =
data->nb_elts;
59 gpu_task->flow[I] = &flows[I];
64 assert(
nullptr !=
data->device_copies[0]->original);
66 caller->
parsec_task.data[I].source_repo_entry = NULL;
70 flows[I] = parsec_flow_t{.name =
nullptr,
71 .sym_type = PARSEC_FLOW_ACCESS_NONE,
74 .flow_datatype_mask = ~0 };
75 gpu_task->flow[I] = &flows[I];
76 gpu_task->flow_nb_elts[I] = 0;
80 if constexpr (
sizeof...(Is) > 0) {
90 template<
typename... Views>
92 bool is_current =
true;
94 throw std::runtime_error(
"register_device_memory may only be invoked from inside a task!");
98 throw std::runtime_error(
"register_device_memory called inside a non-gpu task!");
101 if constexpr (
sizeof...(Views) > 0) {
106 for (
int i =
sizeof...(Views); i < MAX_PARAM_COUNT; ++i) {
118 template<
typename... Views, std::size_t I, std::size_t... Is,
bool DeviceAvail =
false>
119 inline void mark_device_out(std::tuple<Views&...> &views, std::index_sequence<I, Is...>) {
121 using view_type = std::remove_reference_t<std::tuple_element_t<I, std::tuple<Views&...>>>;
122 auto& view = std::get<I>(views);
129 if (
data->owner_device != 0) {
131 device_module->memcpy_async(device_module, stream,
132 data->device_copies[0]->device_private,
133 data->device_copies[
data->owner_device]->device_private,
134 data->nb_elts, parsec_device_gpu_transfer_direction_d2h);
136 if constexpr (
sizeof...(Is) > 0) {
143 template<
typename...
Buffer>
147 throw std::runtime_error(
"mark_device_out may only be invoked from inside a task!");
151 throw std::runtime_error(
"mark_device_out called inside a non-gpu task!");
159 template<
typename... Views, std::size_t I, std::size_t... Is>
160 inline void post_device_out(std::tuple<Views&...> &views, std::index_sequence<I, Is...>) {
162 using view_type = std::remove_reference_t<std::tuple_element_t<I, std::tuple<Views&...>>>;
164 if constexpr (!std::is_const_v<view_type>) {
165 auto& view = std::get<I>(views);
169 data->device_copies[0]->version =
data->device_copies[
data->owner_device]->version;
170 parsec_data_transfer_ownership_to_copy(
data, 0, PARSEC_FLOW_ACCESS_READ);
173 if constexpr (
sizeof...(Is) > 0) {
179 template<
typename...
Buffer>
constexpr auto data(C &c) -> decltype(c.data())
void mark_device_out(std::tuple< Views &... > &views, std::index_sequence< I, Is... >)
bool register_device_memory(std::tuple< Views &... > &views, std::index_sequence< I, Is... >)
void post_device_out(std::tuple< Views &... > &views, std::index_sequence< I, Is... >)
parsec_data_t * get_parsec_data(const ttg_parsec::Buffer< T, A > &db)
thread_local parsec_ttg_task_base_t * parsec_ttg_caller
this contains PaRSEC-based TTG functionality
void post_device_out(std::tuple< Buffer &... > &b)
bool register_device_memory(std::tuple< Views &... > &views)
void mark_device_out(std::tuple< Buffer &... > &b)
TTG_IMPL_NS::Buffer< T, Allocator > Buffer
parsec_gpu_task_t * gpu_task
parsec_gpu_exec_stream_t * stream
parsec_device_gpu_module_t * device
parsec_task_t parsec_task