2#ifndef TTG_PARSEC_BUFFER_H
3#define TTG_PARSEC_BUFFER_H
9#include <parsec/data_internal.h>
10#include <parsec/mca/device/device.h>
11#include <parsec/mca/device/device_gpu.h>
12#include <parsec/utils/zone_malloc.h>
20#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT)
21#include <cuda_runtime.h>
29 template<
typename T,
typename A>
38 throw std::runtime_error(
"Allocate on empty allocator!");
61 template<
typename PtrT,
typename Allocator>
65 using value_type =
typename allocator_traits::value_type;
85 m_ptr = std::shared_ptr<value_type[]>(allocator_traits::allocate(m_allocator, m_size),
86 [allocator = m_allocator, size = m_size](
value_type*
ptr)
mutable {
87 allocator_traits::deallocate(allocator,
ptr, size);
91 m_ptr = std::make_shared<value_type[]>(m_size);
93 this->device_private = m_ptr.get();
96 void do_deallocate() {
97 if (this->device_private !=
nullptr) {
98 auto ptr = std::move(m_ptr);
99 this->device_private =
nullptr;
101 this->coherency_state = PARSEC_DATA_COHERENCY_INVALID;
106 static void allocate(parsec_data_copy_t *parsec_copy,
int device) {
111 static void deallocate(parsec_data_copy_t *parsec_copy,
int device) {
113 copy->do_deallocate();
125 template<
typename Ptr>
128 constexpr const bool is_empty_allocator = std::is_same_v<Allocator, empty_allocator<value_type>>;
129 assert(is_empty_allocator);
130 m_ptr = std::move(
ptr);
132 this->dtt = parsec_datatype_int8_t;
136 template<
typename AllocatorT = allocator_type>
139 AllocatorT&& alloc = AllocatorT()) {
140 constexpr const bool is_empty_allocator = std::is_same_v<AllocatorT, empty_allocator<value_type>>;
141 assert(!is_empty_allocator);
142 m_allocator = std::forward<AllocatorT>(alloc);
144 this->dtt = parsec_datatype_int8_t;
153 this->alloc_cb = &allocate;
154 this->release_cb = &deallocate;
155 this->device_private =
nullptr;
160 this->alloc_cb =
nullptr;
161 this->release_cb =
nullptr;
162 this->do_deallocate();
186 parsec_data_t *data = PARSEC_OBJ_NEW(parsec_data_t);
187 data->owner_device = 0;
192 copy->construct(size, scope, allocator);
193 parsec_data_copy_attach(data, copy, 0);
196 data->device_copies[0]->flags |= PARSEC_DATA_FLAG_PARSEC_MANAGED;
198 data->device_copies[0]->coherency_state = PARSEC_DATA_COHERENCY_EXCLUSIVE;
199 data->device_copies[0]->version = 1;
201 data->device_copies[0]->coherency_state = PARSEC_DATA_COHERENCY_INVALID;
203 data->device_copies[0]->version = 0;
210 parsec_data_t *data = PARSEC_OBJ_NEW(parsec_data_t);
211 data->owner_device = 0;
216 copy->construct(std::move(
ptr), size);
217 parsec_data_copy_attach(data, copy, 0);
220 data->device_copies[0]->flags |= PARSEC_DATA_FLAG_PARSEC_MANAGED;
222 data->device_copies[0]->coherency_state = PARSEC_DATA_COHERENCY_EXCLUSIVE;
223 data->device_copies[0]->version = 1;
225 data->device_copies[0]->coherency_state = PARSEC_DATA_COHERENCY_INVALID;
227 data->device_copies[0]->version = 0;
246template<
typename T,
typename Allocator>
255 static_assert(std::is_trivially_copyable_v<element_type>,
256 "Only trivially copyable types are supported for devices.");
259 parsec_data_t *m_data =
nullptr;
260 std::size_t m_count = 0;
265 void release_data() {
266 if (
nullptr == m_data)
return;
269 parsec_data_discard(m_data);
279 : m_data(detail::ttg_parsec_data_types<
std::shared_ptr<
value_type[]>, Allocator>::create_data(n,
scope))
290 : m_data(detail::ttg_parsec_data_types<
std::shared_ptr<
value_type[]>,
296 template<
typename Deleter>
299 : m_data(detail::ttg_parsec_data_types<
std::unique_ptr<
value_type[], Deleter>,
313 , m_count(db.m_count)
326 std::swap(m_data, db.m_data);
327 std::swap(m_count, db.m_count);
348 assert(parsec_nb_devices > parsec_id);
350 assert(m_data->device_copies[parsec_id] !=
nullptr);
351 m_data->owner_device = parsec_id;
352 m_data->device_copies[parsec_id]->version = m_data->device_copies[0]->version;
353 m_data->device_copies[parsec_id]->coherency_state = PARSEC_DATA_COHERENCY_EXCLUSIVE;
357 if (
empty())
return true;
359 uint32_t max_version = 0;
360 for (
int i = 0; i < parsec_nb_devices; ++i) {
361 if (
nullptr == m_data->device_copies[i])
continue;
362 max_version = std::max(max_version, m_data->device_copies[i]->version);
364 return (m_data->device_copies[parsec_id] &&
365 m_data->device_copies[parsec_id]->version == max_version);
382 if (
empty())
return nullptr;
384 return static_cast<pointer_type>(m_data->device_copies[device_id]->device_private);
390 if (
empty())
return nullptr;
392 return static_cast<const_pointer_type>(m_data->device_copies[device_id]->device_private);
400 if (
empty())
return nullptr;
401 return static_cast<pointer_type>(m_data->device_copies[m_data->owner_device]->device_private);
407 if (
empty())
return nullptr;
408 return static_cast<const_pointer_type>(m_data->device_copies[m_data->owner_device]->device_private);
415 if (
empty())
return nullptr;
417 return static_cast<pointer_type>(parsec_data_get_ptr(m_data, device_id));
424 if (
empty())
return nullptr;
430 return static_cast<pointer_type>(parsec_data_get_ptr(m_data, 0));
440 return (parsec_data_get_ptr(m_data, device_id) !=
nullptr);
445 if (!m_data)
throw std::runtime_error(
"Cannot allocate on an empty buffer!");
447 assert(parsec_nb_devices > parsec_id);
448 assert(m_data !=
nullptr);
449 if (m_data->device_copies[parsec_id] ==
nullptr || m_data->device_copies[parsec_id]->device_private ==
nullptr) {
452 parsec_device_gpu_module_t *device_module = (parsec_device_gpu_module_t*)parsec_mca_device_get(parsec_id);
454 T*
ptr = (T*)zone_malloc(device_module->memory, m_count*
sizeof(T));
455 if (
nullptr ==
ptr) {
456 throw std::bad_alloc{};
458 parsec_data_copy_t* copy =
nullptr;
459 if (m_data->device_copies[parsec_id] ==
nullptr) {
461 copy = parsec_data_copy_new(m_data, parsec_id, parsec_datatype_int8_t,
462 PARSEC_DATA_FLAG_PARSEC_MANAGED | PARSEC_DATA_FLAG_PARSEC_OWNED);
464 copy = m_data->device_copies[parsec_id];
466 copy->coherency_state = PARSEC_DATA_COHERENCY_INVALID;
467 copy->device_private =
ptr;
468 m_data->device_copies[parsec_id] = copy;
472 m_data->device_copies[parsec_id]->version = 0;
482 for (
int i = 1; i < parsec_nb_devices; ++i) {
506 return (m_count == 0 || m_data);
509 operator bool()
const {
554 m_data->device_copies[0]->version = 0;
556 m_data->device_copies[0]->version = 1;
558 for (
int i = 0; i < parsec_nb_devices; ++i) {
559 if (m_data->device_copies[i] !=
nullptr) {
560 m_data->device_copies[i]->version = 0;
564 m_data->owner_device = 0;
570 return (m_data->device_copies[0]->version == 0)
576 if (dev.
is_device() && m_data->owner_device == 0) {
578 PARSEC_DEV_DATA_ADVICE_PREFERRED_DEVICE);
584 throw std::runtime_error(
"Unable to add device that has already a buffer set!");
595#ifdef TTG_SERIALIZATION_SUPPORTS_BOOST
596 template <
typename Archive>
597 void serialize(Archive& ar,
const unsigned int version) {
598 if constexpr (ttg::detail::is_output_archive_v<Archive>) {
599 std::size_t s =
size();
610#ifdef TTG_SERIALIZATION_SUPPORTS_MADNESS
611 template <
typename Archive>
612 std::enable_if_t<std::is_base_of_v<madness::archive::BufferInputArchive, Archive> ||
613 std::is_base_of_v<madness::archive::BufferOutputArchive, Archive>>
614 serialize(Archive& ar) {
615 if constexpr (ttg::detail::is_output_archive_v<Archive>) {
616 std::size_t s =
size();
622 if (m_data !=
nullptr) {
624 throw std::runtime_error(
"Buffer size mismatch in deserialization!");
637 template<
typename T,
typename A>
639 return const_cast<parsec_data_t*
>(db.m_data);
Represents a device in a specific execution space.
constexpr ttg::ExecutionSpace available_execution_space
ttg::device::Device parsec_device_to_ttg_device(int parsec_id)
int ttg_device_to_parsec_device(const ttg::device::Device &device)
parsec_data_t * get_parsec_data(const ttg_parsec::Buffer< T, A > &db)
this contains PaRSEC-based TTG functionality
std::decay_t< T > element_type
std::add_pointer_t< value_type > pointer_type
const_pointer_type device_ptr_on(const ttg::device::Device &device) const
void prefer_device(ttg::device::Device dev)
const_pointer_type owner_device_ptr() const
void reset(std::shared_ptr< value_type[]> ptr, std::size_t n, ttg::scope scope=ttg::scope::SyncIn)
Buffer & operator=(const Buffer &db)=delete
bool is_valid_on(const ttg::device::Device &device) const
Buffer(const Buffer &db)=delete
ttg::device::Device get_owner_device() const
void reset_scope(ttg::scope scope)
pointer_type device_ptr_on(const ttg::device::Device &device)
const_pointer_type host_ptr() const
Buffer(std::unique_ptr< value_type[], Deleter > ptr, std::size_t n, ttg::scope scope=ttg::scope::SyncIn)
const_pointer_type current_device_ptr() const
Buffer & operator=(Buffer &&db)
void add_device(ttg::device::Device dev, pointer_type ptr, bool is_current=false)
void pin_on(int device_id)
Buffer(std::shared_ptr< value_type[]> ptr, std::size_t n, ttg::scope scope=ttg::scope::SyncIn)
bool is_current_on(ttg::device::Device dev) const
std::remove_all_extents_t< T > value_type
void allocate_on(const ttg::device::Device &device)
pointer_type owner_device_ptr()
void set_owner_device(const ttg::device::Device &device)
const std::remove_const_t< value_type > * const_pointer_type
Buffer(std::size_t n, ttg::scope scope=ttg::scope::SyncIn)
pointer_type current_device_ptr()
void reset(std::size_t n, ttg::scope scope=ttg::scope::SyncIn)
void unpin_on(int device_id)
std::decay_t< T > value_type
value_type * allocate(std::size_t size)
void deallocate(value_type *ptr, std::size_t size)
data_copy_type & operator=(data_copy_type &&)=default
void construct(Ptr &&ptr, std::size_t size)
void construct(std::size_t size, ttg::scope scope, AllocatorT &&alloc=AllocatorT())
data_copy_type & operator=(const data_copy_type &)=delete
data_copy_type(const data_copy_type &)=delete
data_copy_type(data_copy_type &&)=default
static parsec_data_t * create_data(PtrT &ptr, std::size_t size, ttg::scope scope)
static parsec_data_t * create_data(std::size_t size, ttg::scope scope, const allocator_type &allocator=allocator_type())
static PARSEC_OBJ_CLASS_INSTANCE(data_copy_type, parsec_data_copy_t, data_copy_construct, data_copy_destruct)
static void data_copy_destruct(data_copy_type *obj)
static constexpr bool always_allocate_on_host
typename allocator_traits::value_type value_type
static void data_copy_construct(data_copy_type *obj)
typename allocator_traits::allocator_type allocator_type
std::allocator_traits< Allocator > allocator_traits