1#ifndef TTG_PARSEC_TASK_H
2#define TTG_PARSEC_TASK_H
6#include <parsec/parsec_internal.h>
7#include <parsec/mca/device/device_gpu.h>
16 parsec_gpu_exec_stream_t*
stream =
nullptr;
17 parsec_device_gpu_module_t*
device =
nullptr;
21 template<
bool SupportDevice>
36 static constexpr size_t num_flows = MAX_PARAM_COUNT;
56 using flags_type = std::underlying_type<ttg_parsec_data_flags>::type;
62 using flags_type = std::underlying_type<ttg_parsec_data_flags>::type;
69 using flags_type = std::underlying_type<ttg_parsec_data_flags>::type;
70 return static_cast<flags_type
>(lhs) &
static_cast<flags_type
>(rhs);
75 using flags_type = std::underlying_type<ttg_parsec_data_flags>::type;
82 using flags_type = std::underlying_type<ttg_parsec_data_flags>::type;
104 template<std::
size_t i = 0,
typename TT>
106 if constexpr (TT::numins > i) {
107 if (std::get<i>(tt->input_reducers)) {
108 streams[i].goal = tt->static_stream_goal[i];
110 PARSEC_OBJ_CONSTRUCT(&streams[i].reduce_copies, parsec_lifo_t);
111 streams[i].reduce_count.store(0, std::memory_order_relaxed);
114 if constexpr((i + 1) < TT::numins) {
115 init_stream_info_impl<i+1>(tt, streams);
120 template<
typename TT>
122 init_stream_info_impl<0>(tt, streams);
165 for (
int i = 0; i < MAX_PARAM_COUNT; ++i) {
166 this->parsec_task.data[i].data_in =
nullptr;
167 this->parsec_task.data[i].data_out =
nullptr;
172 parsec_taskpool_t *taskpool, int32_t priority,
190 for (
int i = 0; i < MAX_PARAM_COUNT; ++i) {
191 this->parsec_task.data[i].data_in =
nullptr;
192 this->parsec_task.data[i].data_out =
nullptr;
201 template <
typename TT,
bool KeyIsVo
id = ttg::meta::is_
void_v<
typename TT::key_type>>
210 std::array<stream_info_t, num_streams>
streams;
211#ifdef TTG_HAVE_COROUTINE
212 void* suspended_task_address =
nullptr;
229 parsec_task_class_t *task_class, parsec_taskpool_t *taskpool,
230 TT *tt_ptr, int32_t priority)
241 *(uintptr_t*)&(
parsec_task.locals[2]) =
reinterpret_cast<uintptr_t
>(&this->
key);
252 template<ttg::ExecutionSpace Space>
261 template<ttg::ExecutionSpace Space>
264 return PARSEC_HOOK_RETURN_DONE;
270 parsec_key_t
pkey() {
return reinterpret_cast<parsec_key_t
>(&
key); }
273 template <
typename TT>
277 std::array<stream_info_t, num_streams>
streams;
278#ifdef TTG_HAVE_COROUTINE
279 void* suspended_task_address =
nullptr;
294 parsec_taskpool_t *taskpool,
TT *tt_ptr, int32_t priority)
310 template<ttg::ExecutionSpace Space>
319 template<ttg::ExecutionSpace Space>
322 return PARSEC_HOOK_RETURN_DONE;
328 parsec_key_t
pkey() {
return 0; }
344 parsec_task_class_t *task_class, parsec_taskpool_t *taskpool,
354 for (
int i = 0; i < 4; ++i) {
361 parsec_task_t *vp_task_rings[1] = { &task_base->
parsec_task };
362 parsec_execution_stream_t *es = parsec_my_execution_stream();
363 __parsec_schedule_vp(es, vp_task_rings, 0);
void release_task(task_t *task, parsec_task_t **task_ring=nullptr)
static constexpr bool derived_has_device_op()
static resultT get(InTuple &&intuple)
uint8_t operator&(ttg_parsec_data_flags lhs, ttg_parsec_data_flags rhs)
parsec_hook_return_t(* parsec_static_op_t)(void *)
ttg_parsec_data_flags operator|=(ttg_parsec_data_flags &lhs, ttg_parsec_data_flags rhs)
ttg_parsec_data_flags operator&=(ttg_parsec_data_flags &lhs, ttg_parsec_data_flags rhs)
bool operator!(ttg_parsec_data_flags lhs)
ttg_parsec_data_flags operator|(ttg_parsec_data_flags lhs, ttg_parsec_data_flags rhs)
this contains PaRSEC-based TTG functionality
@ Invalid
not a coroutine, i.e. a standard task function, -> void
Computes hash values for objects of type T.
parsec_gpu_task_t * gpu_task
parsec_task_class_t task_class
parsec_gpu_exec_stream_t * stream
parsec_device_gpu_module_t * device
static constexpr device_ptr_t * dev_ptr()
static constexpr size_t num_flows
static constexpr bool support_device
std::atomic< std::size_t > reduce_count
parsec_lifo_t reduce_copies
parsec_task_t parsec_task
ttg_data_copy_t ** copies
parsec_ttg_task_base_t(parsec_thread_mempool_t *mempool, parsec_task_class_t *task_class, int data_count, ttg_data_copy_t **copies, bool defer_writer=TTG_PARSEC_DEFER_WRITER)
void() release_task_fn(parsec_ttg_task_base_t *)
void init_stream_info(TT *tt, std::array< stream_info_t, TT::numins > &streams)
void init_stream_info_impl(TT *tt, std::array< stream_info_t, TT::numins > &streams)
release_task_fn * release_task_cb
ttg_parsec_data_flags data_flags
parsec_hash_table_item_t tt_ht_item
parsec_ttg_task_base_t(parsec_thread_mempool_t *mempool, parsec_task_class_t *task_class, parsec_taskpool_t *taskpool, int32_t priority, int data_count, ttg_data_copy_t **copies, release_task_fn *release_fn, bool defer_writer=TTG_PARSEC_DEFER_WRITER)
parsec_ttg_task_t(parsec_thread_mempool_t *mempool, parsec_task_class_t *task_class, parsec_taskpool_t *taskpool, TT *tt_ptr, int32_t priority)
parsec_ttg_task_t(parsec_thread_mempool_t *mempool, parsec_task_class_t *task_class, TT *tt_ptr)
device_state_t< TT::derived_has_device_op()> dev_state
parsec_hook_return_t invoke_op()
std::array< stream_info_t, num_streams > streams
parsec_hook_return_t invoke_evaluate()
static void release_task(parsec_ttg_task_base_t *task_base)
static constexpr size_t num_copies
parsec_hook_return_t invoke_op()
parsec_ttg_task_t(parsec_thread_mempool_t *mempool, parsec_task_class_t *task_class, TT *tt_ptr)
std::array< stream_info_t, num_streams > streams
typename TT::key_type key_type
parsec_ttg_task_t(const key_type &key, parsec_thread_mempool_t *mempool, parsec_task_class_t *task_class, parsec_taskpool_t *taskpool, TT *tt_ptr, int32_t priority)
static constexpr size_t num_streams
parsec_hook_return_t invoke_evaluate()
device_state_t< TT::derived_has_device_op()> dev_state
static void release_task(parsec_ttg_task_base_t *task_base)
ttg_data_copy_t * copies[num_copies]
parsec_ttg_task_base_t * parent_task
reducer_task_t(parsec_ttg_task_base_t *task, parsec_thread_mempool_t *mempool, parsec_task_class_t *task_class, parsec_taskpool_t *taskpool, int32_t priority, bool is_first)
static void release_task(parsec_ttg_task_base_t *task_base)
#define TTG_PARSEC_DEFER_WRITER