13 static_assert(I < MAX_PARAM_COUNT,
14 "PaRSEC only supports MAX_PARAM_COUNT device input/outputs. "
15 "Increase MAX_PARAM_COUNT and recompile PaRSEC/TTG.");
16 using view_type = std::remove_reference_t<std::tuple_element_t<I, std::tuple<Views&...>>>;
18 assert(
nullptr != caller->
dev_ptr);
22 auto& view = std::get<I>(views);
23 bool is_current =
false;
30 if (
nullptr != data) {
31 auto access = PARSEC_FLOW_ACCESS_RW;
32 if constexpr (std::is_const_v<view_type>) {
34 access = PARSEC_FLOW_ACCESS_READ;
36 access = PARSEC_FLOW_ACCESS_WRITE;
41 flows[I] = parsec_flow_t{.name =
nullptr,
42 .sym_type = PARSEC_SYM_INOUT,
43 .flow_flags =
static_cast<uint8_t
>(access),
45 .flow_datatype_mask = ~0 };
47 gpu_task->flow_nb_elts[I] = data->nb_elts;
48 gpu_task->flow[I] = &flows[I];
53 assert(
nullptr != data->device_copies[0]->original);
54 caller->
parsec_task.data[I].data_in = data->device_copies[0];
55 caller->
parsec_task.data[I].source_repo_entry = NULL;
59 flows[I] = parsec_flow_t{.name =
nullptr,
60 .sym_type = PARSEC_FLOW_ACCESS_NONE,
63 .flow_datatype_mask = ~0 };
64 gpu_task->flow[I] = &flows[I];
65 gpu_task->flow_nb_elts[I] = 0;
69 if constexpr (
sizeof...(Is) > 0) {
112 throw std::runtime_error(
"register_device_memory may only be invoked from inside a task!");
116 throw std::runtime_error(
"register_device_memory called inside a non-gpu task!");
121 assert(
nullptr != caller->
dev_ptr);
125 bool is_current =
false;
126 for (i = 0; i < span.size(); ++i) {
127 parsec_data_t* data = span[i].impl_data;
129 bool is_const = span[i].is_const;
130 bool is_scratch = span[i].is_scratch;
132 if (
nullptr != data) {
133 auto access = PARSEC_FLOW_ACCESS_RW;
135 access = PARSEC_FLOW_ACCESS_READ;
137 access = PARSEC_FLOW_ACCESS_WRITE;
148 flows[i] = parsec_flow_t{.name =
nullptr,
149 .sym_type = PARSEC_SYM_INOUT,
150 .flow_flags =
static_cast<uint8_t
>(access),
152 .flow_datatype_mask = ~0 };
154 gpu_task->flow_nb_elts[i] = data->nb_elts;
155 gpu_task->flow[i] = &flows[i];
160 assert(
nullptr != data->device_copies[0]->original);
161 caller->
parsec_task.data[i].data_in = data->device_copies[0];
162 caller->
parsec_task.data[i].source_repo_entry = NULL;
167 parsec_atomic_lock(&data->lock);
173 parsec_atomic_unlock(&data->lock);
181 flows[i] = parsec_flow_t{.name =
nullptr,
182 .sym_type = PARSEC_FLOW_ACCESS_NONE,
185 .flow_datatype_mask = ~0 };
186 gpu_task->flow[i] = &flows[i];
187 gpu_task->flow_nb_elts[i] = 0;
193 for (; i < MAX_PARAM_COUNT; ++i) {
205 template<
typename... Views, std::size_t I, std::size_t... Is,
bool DeviceAvail =
false>
208 using view_type = std::remove_reference_t<std::tuple_element_t<I, std::tuple<Views&...>>>;
209 auto& view = std::get<I>(views);
216 if (
nullptr != data && data->owner_device != 0) {
218 if (
nullptr == data->device_copies[0]->device_private) {
219 assert(
nullptr != data->device_copies[0]->alloc_cb);
220 data->device_copies[0]->alloc_cb(data->device_copies[0], 0);
223 int ret = device_module->memcpy_async(device_module, stream,
224 data->device_copies[0]->device_private,
225 data->device_copies[data->owner_device]->device_private,
226 data->nb_elts, parsec_device_gpu_transfer_direction_d2h);
227 if (ret != PARSEC_SUCCESS)
throw std::runtime_error(
"Failed to copy data from device to host!");
229 if constexpr (
sizeof...(Is) > 0) {
255 using view_type = std::remove_reference_t<std::tuple_element_t<I, std::tuple<Views&...>>>;
257 if constexpr (!std::is_const_v<view_type>) {
258 auto& view = std::get<I>(views);
262 data->device_copies[0]->version = data->device_copies[data->owner_device]->version;
263 parsec_data_transfer_ownership_to_copy(data, 0, PARSEC_FLOW_ACCESS_READ);
266 if constexpr (
sizeof...(Is) > 0) {