1 #ifndef TTG_PARSEC_DEVICEFUNC_H
2 #define TTG_PARSEC_DEVICEFUNC_H
6 #include <parsec/mca/device/device_gpu.h>
10 template<
typename... Views, std::size_t I, std::size_t... Is>
12 static_assert(I < MAX_PARAM_COUNT,
13 "PaRSEC only supports MAX_PARAM_COUNT device input/outputs. "
14 "Increase MAX_PARAM_COUNT and recompile PaRSEC/TTG.");
15 using view_type = std::remove_reference_t<std::tuple_element_t<I, std::tuple<Views&...>>>;
17 assert(
nullptr != caller->
dev_ptr);
21 auto& view = std::get<I>(views);
22 bool is_current =
false;
23 static_assert(ttg::meta::is_buffer_v<view_type> || ttg::meta::is_devicescratch_v<view_type>);
29 if (
nullptr !=
data) {
30 auto access = PARSEC_FLOW_ACCESS_RW;
31 if constexpr (std::is_const_v<view_type>) {
33 access = PARSEC_FLOW_ACCESS_READ;
34 }
else if constexpr (ttg::meta::is_devicescratch_v<view_type>) {
36 access = PARSEC_FLOW_ACCESS_WRITE;
42 flows[I] = parsec_flow_t{.name =
nullptr,
43 .sym_type = PARSEC_SYM_INOUT,
44 .flow_flags =
static_cast<uint8_t
>(access),
46 .flow_datatype_mask = ~0 };
48 gpu_task->flow_nb_elts[I] =
data->nb_elts;
49 gpu_task->flow[I] = &flows[I];
54 assert(
nullptr !=
data->device_copies[0]->original);
56 caller->
parsec_task.data[I].source_repo_entry = NULL;
60 flows[I] = parsec_flow_t{.name =
nullptr,
61 .sym_type = PARSEC_FLOW_ACCESS_NONE,
64 .flow_datatype_mask = ~0 };
65 gpu_task->flow[I] = &flows[I];
66 gpu_task->flow_nb_elts[I] = 0;
70 if constexpr (
sizeof...(Is) > 0) {
80 template<
typename... Views>
82 bool is_current =
true;
84 throw std::runtime_error(
"register_device_memory may only be invoked from inside a task!");
88 throw std::runtime_error(
"register_device_memory called inside a non-gpu task!");
91 if constexpr (
sizeof...(Views) > 0) {
96 for (
int i =
sizeof...(Views); i < MAX_PARAM_COUNT; ++i) {
108 template<
typename T, std::
size_t N>
113 throw std::runtime_error(
"register_device_memory may only be invoked from inside a task!");
117 throw std::runtime_error(
"register_device_memory called inside a non-gpu task!");
122 assert(
nullptr != caller->
dev_ptr);
126 bool is_current =
false;
127 for (i = 0; i < span.size(); ++i) {
129 parsec_data_t*
data = span[i].impl_data;
132 bool is_scratch = span[i].is_scratch;
134 if (
nullptr !=
data) {
135 auto access = PARSEC_FLOW_ACCESS_RW;
137 access = PARSEC_FLOW_ACCESS_WRITE;
139 access = PARSEC_FLOW_ACCESS_READ;
149 flows[i] = parsec_flow_t{.name =
nullptr,
150 .sym_type = PARSEC_SYM_INOUT,
151 .flow_flags =
static_cast<uint8_t
>(access),
153 .flow_datatype_mask = ~0 };
155 gpu_task->flow_nb_elts[i] =
data->nb_elts;
156 gpu_task->flow[i] = &flows[i];
161 assert(
nullptr !=
data->device_copies[0]->original);
163 caller->
parsec_task.data[i].source_repo_entry = NULL;
167 flows[i] = parsec_flow_t{.name =
nullptr,
168 .sym_type = PARSEC_FLOW_ACCESS_NONE,
171 .flow_datatype_mask = ~0 };
172 gpu_task->flow[i] = &flows[i];
173 gpu_task->flow_nb_elts[i] = 0;
179 for (; i < MAX_PARAM_COUNT; ++i) {
191 template<
typename... Views, std::size_t I, std::size_t... Is,
bool DeviceAvail =
false>
194 using view_type = std::remove_reference_t<std::tuple_element_t<I, std::tuple<Views&...>>>;
195 auto& view = std::get<I>(views);
202 if (
data->owner_device != 0) {
204 int ret = device_module->memcpy_async(device_module, stream,
205 data->device_copies[0]->device_private,
206 data->device_copies[
data->owner_device]->device_private,
207 data->nb_elts, parsec_device_gpu_transfer_direction_d2h);
208 assert(ret == PARSEC_SUCCESS);
210 if constexpr (
sizeof...(Is) > 0) {
217 template<
typename...
Buffer>
221 throw std::runtime_error(
"mark_device_out may only be invoked from inside a task!");
225 throw std::runtime_error(
"mark_device_out called inside a non-gpu task!");
233 template<
typename... Views, std::size_t I, std::size_t... Is>
236 using view_type = std::remove_reference_t<std::tuple_element_t<I, std::tuple<Views&...>>>;
238 if constexpr (!std::is_const_v<view_type>) {
239 auto& view = std::get<I>(views);
243 data->device_copies[0]->version =
data->device_copies[
data->owner_device]->version;
244 parsec_data_transfer_ownership_to_copy(
data, 0, PARSEC_FLOW_ACCESS_READ);
247 if constexpr (
sizeof...(Is) > 0) {
254 template<
typename...
Buffer>
261 using view_type = std::remove_reference_t<T>;
262 static_assert(ttg::meta::is_buffer_v<view_type> || ttg::meta::is_devicescratch_v<view_type>);
constexpr auto data(C &c) -> decltype(c.data())
std::integral_constant< bool,(Flags &const_) !=0 > is_const
void mark_device_out(std::tuple< Views &... > &views, std::index_sequence< I, Is... >)
bool register_device_memory(std::tuple< Views &... > &views, std::index_sequence< I, Is... >)
void post_device_out(std::tuple< Views &... > &views, std::index_sequence< I, Is... >)
parsec_data_t * get_parsec_data(const ttg_parsec::Buffer< T, A > &db)
thread_local parsec_ttg_task_base_t * parsec_ttg_caller
this contains PaRSEC-based TTG functionality
void post_device_out(std::tuple< Buffer &... > &b)
parsec_data_t * buffer_data(T &&buffer)
bool register_device_memory(std::tuple< Views &... > &views)
void mark_device_out(std::tuple< Buffer &... > &b)
TTG_IMPL_NS::Buffer< T, Allocator > Buffer
#define TTG_PARSEC_FLOW_ACCESS_TMP
parsec_gpu_task_t * gpu_task
parsec_gpu_exec_stream_t * stream
parsec_device_gpu_module_t * device
parsec_task_t parsec_task