1 #ifndef TTG_PARSEC_DEVICEFUNC_H
2 #define TTG_PARSEC_DEVICEFUNC_H
4 #if defined(TTG_HAVE_CUDART)
10 #include <parsec/mca/device/device_gpu.h>
12 #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT)
13 #include <parsec/mca/device/cuda/device_cuda.h>
14 #elif defined(PARSEC_HAVE_DEV_HIP_SUPPORT)
15 #include <parsec/mca/device/hip/device_hip.h>
20 template<
typename... Views, std::size_t I, std::size_t... Is>
22 static_assert(I < MAX_PARAM_COUNT,
23 "PaRSEC only supports MAX_PARAM_COUNT device input/outputs. "
24 "Increase MAX_PARAM_COUNT and recompile PaRSEC/TTG.");
25 using view_type = std::remove_reference_t<std::tuple_element_t<I, std::tuple<Views&...>>>;
27 assert(
nullptr != caller->
dev_ptr);
31 auto& view = std::get<I>(views);
32 bool is_current =
false;
33 static_assert(ttg::meta::is_buffer_v<view_type> || ttg::meta::is_devicescratch_v<view_type>);
38 auto access = PARSEC_FLOW_ACCESS_RW;
39 if constexpr (std::is_const_v<view_type>) {
42 access = PARSEC_FLOW_ACCESS_READ;
44 }
else if constexpr (ttg::meta::is_devicescratch_v<view_type>) {
46 access = PARSEC_FLOW_ACCESS_WRITE;
55 flows[I] = parsec_flow_t{.name =
nullptr,
56 .sym_type = PARSEC_SYM_INOUT,
57 .flow_flags =
static_cast<uint8_t
>(access),
59 .flow_datatype_mask = ~0 };
61 gpu_task->flow_nb_elts[I] =
data->nb_elts;
62 gpu_task->flow[I] = &flows[I];
67 assert(
nullptr !=
data->device_copies[0]->original);
69 caller->
parsec_task.data[I].source_repo_entry = NULL;
71 if constexpr (
sizeof...(Is) > 0) {
81 template<
typename... Views>
83 bool is_current =
true;
85 throw std::runtime_error(
"register_device_memory may only be invoked from inside a task!");
89 throw std::runtime_error(
"register_device_memory called inside a non-gpu task!");
92 if constexpr (
sizeof...(Views) > 0) {
97 for (
int i =
sizeof...(Views); i < MAX_PARAM_COUNT; ++i) {
109 template<
typename... Views, std::size_t I, std::size_t... Is,
bool DeviceAvail =
false>
110 inline void mark_device_out(std::tuple<Views&...> &views, std::index_sequence<I, Is...>) {
112 using view_type = std::remove_reference_t<std::tuple_element_t<I, std::tuple<Views&...>>>;
113 auto& view = std::get<I>(views);
122 device_module->memcpy_async(device_module, stream,
123 data->device_copies[0]->device_private,
124 data->device_copies[
data->owner_device]->device_private,
125 data->nb_elts, parsec_device_gpu_transfer_direction_d2h);
127 if constexpr (
sizeof...(Is) > 0) {
134 template<
typename...
Buffer>
138 throw std::runtime_error(
"mark_device_out may only be invoked from inside a task!");
142 throw std::runtime_error(
"mark_device_out called inside a non-gpu task!");
150 template<
typename... Views, std::size_t I, std::size_t... Is>
151 inline void post_device_out(std::tuple<Views&...> &views, std::index_sequence<I, Is...>) {
153 using view_type = std::remove_reference_t<std::tuple_element_t<I, std::tuple<Views&...>>>;
155 if constexpr (!std::is_const_v<view_type>) {
156 auto& view = std::get<I>(views);
160 data->device_copies[0]->version =
data->device_copies[
data->owner_device]->version;
161 parsec_data_transfer_ownership_to_copy(
data, 0, PARSEC_FLOW_ACCESS_READ);
164 if constexpr (
sizeof...(Is) > 0) {
170 template<
typename...
Buffer>
constexpr auto data(C &c) -> decltype(c.data())
void mark_device_out(std::tuple< Views &... > &views, std::index_sequence< I, Is... >)
bool register_device_memory(std::tuple< Views &... > &views, std::index_sequence< I, Is... >)
void post_device_out(std::tuple< Views &... > &views, std::index_sequence< I, Is... >)
parsec_data_t * get_parsec_data(const ttg_parsec::Buffer< T, A > &db)
thread_local parsec_ttg_task_base_t * parsec_ttg_caller
this contains PaRSEC-based TTG functionality
void post_device_out(std::tuple< Buffer &... > &b)
bool register_device_memory(std::tuple< Views &... > &views)
void mark_device_out(std::tuple< Buffer &... > &b)
TTG_IMPL_NS::Buffer< T, Allocator > Buffer
parsec_gpu_task_t * gpu_task
parsec_gpu_exec_stream_t * stream
parsec_device_gpu_module_t * device
parsec_task_t parsec_task