2#ifndef TTG_PARSEC_TASK_H
3#define TTG_PARSEC_TASK_H
7#include <parsec/parsec_internal.h>
8#include <parsec/mca/device/device_gpu.h>
17 parsec_gpu_exec_stream_t*
stream =
nullptr;
18 parsec_device_gpu_module_t*
device =
nullptr;
22 template<
bool SupportDevice>
37 static constexpr size_t num_flows = MAX_PARAM_COUNT;
57 using flags_type = std::underlying_type<ttg_parsec_data_flags>::type;
63 using flags_type = std::underlying_type<ttg_parsec_data_flags>::type;
70 using flags_type = std::underlying_type<ttg_parsec_data_flags>::type;
71 return static_cast<flags_type
>(lhs) &
static_cast<flags_type
>(rhs);
76 using flags_type = std::underlying_type<ttg_parsec_data_flags>::type;
83 using flags_type = std::underlying_type<ttg_parsec_data_flags>::type;
105 template<std::
size_t i = 0,
typename TT>
107 if constexpr (TT::numins > i) {
108 if (std::get<i>(tt->input_reducers)) {
109 streams[i].goal = tt->static_stream_goal[i];
111 PARSEC_OBJ_CONSTRUCT(&streams[i].reduce_copies, parsec_lifo_t);
112 streams[i].reduce_count.store(0, std::memory_order_relaxed);
115 if constexpr((i + 1) < TT::numins) {
116 init_stream_info_impl<i+1>(tt, streams);
121 template<
typename TT>
123 init_stream_info_impl<0>(tt, streams);
166 for (
int i = 0; i < MAX_PARAM_COUNT; ++i) {
167 this->parsec_task.data[i].data_in =
nullptr;
168 this->parsec_task.data[i].data_out =
nullptr;
173 parsec_taskpool_t *taskpool, int32_t priority,
191 for (
int i = 0; i < MAX_PARAM_COUNT; ++i) {
192 this->parsec_task.data[i].data_in =
nullptr;
193 this->parsec_task.data[i].data_out =
nullptr;
202 template <
typename TT,
bool KeyIsVo
id = ttg::meta::is_
void_v<
typename TT::key_type>>
211 std::array<stream_info_t, num_streams>
streams;
212#ifdef TTG_HAVE_COROUTINE
213 void* suspended_task_address =
nullptr;
230 parsec_task_class_t *task_class, parsec_taskpool_t *taskpool,
231 TT *tt_ptr, int32_t priority)
242 *(uintptr_t*)&(
parsec_task.locals[2]) =
reinterpret_cast<uintptr_t
>(&this->
key);
253 template<ttg::ExecutionSpace Space>
262 template<ttg::ExecutionSpace Space>
265 return PARSEC_HOOK_RETURN_DONE;
271 parsec_key_t
pkey() {
return reinterpret_cast<parsec_key_t
>(&
key); }
274 template <
typename TT>
278 std::array<stream_info_t, num_streams>
streams;
279#ifdef TTG_HAVE_COROUTINE
280 void* suspended_task_address =
nullptr;
295 parsec_taskpool_t *taskpool,
TT *tt_ptr, int32_t priority)
311 template<ttg::ExecutionSpace Space>
320 template<ttg::ExecutionSpace Space>
323 return PARSEC_HOOK_RETURN_DONE;
329 parsec_key_t
pkey() {
return 0; }
345 parsec_task_class_t *task_class, parsec_taskpool_t *taskpool,
355 for (
int i = 0; i < 4; ++i) {
362 parsec_task_t *vp_task_rings[1] = { &task_base->
parsec_task };
363 parsec_execution_stream_t *es = parsec_my_execution_stream();
364 __parsec_schedule_vp(es, vp_task_rings, 0);
void release_task(task_t *task, parsec_task_t **task_ring=nullptr)
static constexpr bool derived_has_device_op()
static resultT get(InTuple &&intuple)
uint8_t operator&(ttg_parsec_data_flags lhs, ttg_parsec_data_flags rhs)
parsec_hook_return_t(* parsec_static_op_t)(void *)
ttg_parsec_data_flags operator|=(ttg_parsec_data_flags &lhs, ttg_parsec_data_flags rhs)
ttg_parsec_data_flags operator&=(ttg_parsec_data_flags &lhs, ttg_parsec_data_flags rhs)
bool operator!(ttg_parsec_data_flags lhs)
ttg_parsec_data_flags operator|(ttg_parsec_data_flags lhs, ttg_parsec_data_flags rhs)
this contains PaRSEC-based TTG functionality
@ Invalid
not a coroutine, i.e. a standard task function, -> void
Computes hash values for objects of type T.
parsec_gpu_task_t * gpu_task
parsec_task_class_t task_class
parsec_gpu_exec_stream_t * stream
parsec_device_gpu_module_t * device
static constexpr device_ptr_t * dev_ptr()
static constexpr size_t num_flows
static constexpr bool support_device
std::atomic< std::size_t > reduce_count
parsec_lifo_t reduce_copies
parsec_task_t parsec_task
ttg_data_copy_t ** copies
parsec_ttg_task_base_t(parsec_thread_mempool_t *mempool, parsec_task_class_t *task_class, int data_count, ttg_data_copy_t **copies, bool defer_writer=TTG_PARSEC_DEFER_WRITER)
void() release_task_fn(parsec_ttg_task_base_t *)
void init_stream_info(TT *tt, std::array< stream_info_t, TT::numins > &streams)
void init_stream_info_impl(TT *tt, std::array< stream_info_t, TT::numins > &streams)
release_task_fn * release_task_cb
ttg_parsec_data_flags data_flags
parsec_hash_table_item_t tt_ht_item
parsec_ttg_task_base_t(parsec_thread_mempool_t *mempool, parsec_task_class_t *task_class, parsec_taskpool_t *taskpool, int32_t priority, int data_count, ttg_data_copy_t **copies, release_task_fn *release_fn, bool defer_writer=TTG_PARSEC_DEFER_WRITER)
parsec_ttg_task_t(parsec_thread_mempool_t *mempool, parsec_task_class_t *task_class, parsec_taskpool_t *taskpool, TT *tt_ptr, int32_t priority)
parsec_ttg_task_t(parsec_thread_mempool_t *mempool, parsec_task_class_t *task_class, TT *tt_ptr)
device_state_t< TT::derived_has_device_op()> dev_state
parsec_hook_return_t invoke_op()
std::array< stream_info_t, num_streams > streams
parsec_hook_return_t invoke_evaluate()
static void release_task(parsec_ttg_task_base_t *task_base)
static constexpr size_t num_copies
parsec_hook_return_t invoke_op()
parsec_ttg_task_t(parsec_thread_mempool_t *mempool, parsec_task_class_t *task_class, TT *tt_ptr)
std::array< stream_info_t, num_streams > streams
typename TT::key_type key_type
parsec_ttg_task_t(const key_type &key, parsec_thread_mempool_t *mempool, parsec_task_class_t *task_class, parsec_taskpool_t *taskpool, TT *tt_ptr, int32_t priority)
static constexpr size_t num_streams
parsec_hook_return_t invoke_evaluate()
device_state_t< TT::derived_has_device_op()> dev_state
static void release_task(parsec_ttg_task_base_t *task_base)
ttg_data_copy_t * copies[num_copies]
parsec_ttg_task_base_t * parent_task
reducer_task_t(parsec_ttg_task_base_t *task, parsec_thread_mempool_t *mempool, parsec_task_class_t *task_class, parsec_taskpool_t *taskpool, int32_t priority, bool is_first)
static void release_task(parsec_ttg_task_base_t *task_base)
#define TTG_PARSEC_DEFER_WRITER