buffer.h
Go to the documentation of this file.
1 #ifndef TTG_PARSEC_BUFFER_H
2 #define TTG_PARSEC_BUFFER_H
3 
4 #include <array>
5 #include <vector>
6 #include <cassert>
7 #include <parsec.h>
8 #include <parsec/data_internal.h>
9 #include <parsec/mca/device/device.h>
11 #include "ttg/parsec/parsec-ext.h"
12 #include "ttg/util/iovec.h"
13 #include "ttg/device/device.h"
14 #include "ttg/parsec/device.h"
15 #include "ttg/devicescope.h"
16 
17 #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT)
18 #include <cuda_runtime.h>
19 #endif // PARSEC_HAVE_DEV_CUDA_SUPPORT
20 
21 namespace ttg_parsec {
22 
23 
24 namespace detail {
25  // fwd decl
26  template<typename T, typename A>
27  parsec_data_t* get_parsec_data(const ttg_parsec::Buffer<T, A>& db);
28 
29  template<typename T>
30  struct empty_allocator {
31 
32  using value_type = std::decay_t<T>;
33 
34  value_type* allocate(std::size_t size) {
35  throw std::runtime_error("Allocate on empty allocator!");
36  }
37 
38  void deallocate(value_type* ptr, std::size_t size) {
39  /* nothing to be done; will be called from ~data_copy_type() */
40  }
41  };
42 
43  /* overloads for pointers and smart pointers */
44  template<typename T>
45  inline T* to_address(T* ptr) {
46  return ptr;
47  }
48 
49  template<typename T>
50  inline auto to_address(T&& ptr) {
51  return ptr.get(); // smart pointer
52  }
53 
58  template<typename PtrT, typename Allocator>
60  using allocator_traits = std::allocator_traits<Allocator>;
61  using allocator_type = typename allocator_traits::allocator_type;
62  using value_type = typename allocator_traits::value_type;
63 
64  /* used as a hook into the PaRSEC object management system
65  * so we can release the memory back to the allocator once
66  * data copy is destroyed */
67  struct data_copy_type : public parsec_data_copy_t
68  {
69  private:
70  [[no_unique_address]]
71  allocator_type m_allocator;
72  PtrT m_ptr; // keep a reference if PtrT is a shared_ptr
73  std::size_t m_size;
74 
75  void allocate(std::size_t size) {
76  if constexpr (std::is_pointer_v<PtrT>) {
77  m_ptr = allocator_traits::allocate(m_allocator, size);
78  }
79  this->device_private = m_ptr;
80  m_size = size;
81  }
82 
83  void deallocate() {
84  allocator_traits::deallocate(m_allocator, static_cast<value_type*>(this->device_private), this->m_size);
85  this->device_private = nullptr;
86  this->m_size = 0;
87  }
88 
89  public:
90 
91  /* default construction and move, but not copy */
92  data_copy_type() = default;
93  data_copy_type(const data_copy_type&) = delete;
97 
98  void construct(PtrT ptr, std::size_t size) {
99  m_allocator = allocator_type{};
100  constexpr const bool is_empty_allocator = std::is_same_v<Allocator, empty_allocator<value_type>>;
101  assert(is_empty_allocator);
102  m_ptr = std::move(ptr);
103  this->device_private = const_cast<value_type*>(to_address(m_ptr));
104  }
105 
106  void construct(std::size_t size,
107  const allocator_type& alloc = allocator_type()) {
108  constexpr const bool is_empty_allocator = std::is_same_v<Allocator, empty_allocator<value_type>>;
109  assert(!is_empty_allocator);
110  m_allocator = alloc;
111  allocate(size);
112  this->device_private = m_ptr;
113  }
114 
116  this->deallocate();
117  }
118  };
119 
124  {
125  /* placement new */
126  new(obj)(data_copy_type);
127  }
128 
130  {
131  obj->~data_copy_type(); // call destructor
132  }
133 
134  inline static PARSEC_OBJ_CLASS_INSTANCE(data_copy_type, parsec_data_copy_t,
137 
138  static parsec_data_t * create_data(std::size_t size, ttg::scope scope,
139  const allocator_type& allocator = allocator_type()) {
140  parsec_data_t *data = PARSEC_OBJ_NEW(parsec_data_t);
141  data->owner_device = 0;
142  data->nb_elts = size*sizeof(value_type);
143 
144  /* create the host copy and allocate host memory */
145  data_copy_type *copy = PARSEC_OBJ_NEW(data_copy_type);
146  copy->construct(size, allocator);
147  parsec_data_copy_attach(data, copy, 0);
148 
149  /* adjust data flags */
150  data->device_copies[0]->flags |= PARSEC_DATA_FLAG_PARSEC_MANAGED;
151  data->device_copies[0]->coherency_state = PARSEC_DATA_COHERENCY_SHARED;
152  /* setting version to 0 causes data not to be sent to the device */
153  data->device_copies[0]->version = (scope == ttg::scope::SyncIn) ? 1 : 0;
154 
155  return data;
156  }
157 
158  static parsec_data_t * create_data(PtrT& ptr, std::size_t size, ttg::scope scope) {
159  parsec_data_t *data = PARSEC_OBJ_NEW(parsec_data_t);
160  data->owner_device = 0;
161  data->nb_elts = size*sizeof(value_type);
162 
163  /* create the host copy and allocate host memory */
164  data_copy_type *copy = PARSEC_OBJ_NEW(data_copy_type);
165  copy->construct(std::move(ptr), size);
166  parsec_data_copy_attach(data, copy, 0);
167 
168  /* adjust data flags */
169  data->device_copies[0]->flags |= PARSEC_DATA_FLAG_PARSEC_MANAGED;
170  data->device_copies[0]->coherency_state = PARSEC_DATA_COHERENCY_SHARED;
171  /* setting version to 0 causes data not to be sent to the device */
172  data->device_copies[0]->version = (scope == ttg::scope::SyncIn) ? 1 : 0;
173 
174  return data;
175  }
176  };
177 } // namespace detail
178 
190 template<typename T, typename Allocator>
191 struct Buffer {
192 
193  /* TODO: add overloads for T[]? */
194  using value_type = std::remove_all_extents_t<T>;
195  using pointer_type = std::add_pointer_t<value_type>;
196  using const_pointer_type = const std::remove_const_t<value_type>*;
197  using element_type = std::decay_t<T>;
198 
199  static_assert(std::is_trivially_copyable_v<element_type>,
200  "Only trivially copyable types are supported for devices.");
201 
202 private:
203  parsec_data_t *m_data = nullptr;
204  std::size_t m_count = 0;
205 
206  friend parsec_data_t* detail::get_parsec_data<T>(const ttg_parsec::Buffer<T, Allocator>&);
207 
208 
209  void release_data() {
210  if (nullptr == m_data) return;
211  /* discard the parsec data so it can be collected by the runtime
212  * and the buffer be free'd in the parsec_data_copy_t destructor */
213  parsec_data_discard(m_data);
214  /* set data to null so we don't repeat the above */
215  m_data = nullptr;
216  }
217 
218 public:
219 
220  Buffer() = default;
221 
223  : m_data(detail::ttg_parsec_data_types<T*, Allocator>::create_data(n, scope))
224  , m_count(n)
225  { }
226 
232  Buffer(std::shared_ptr<value_type[]> ptr, std::size_t n,
234  : m_data(detail::ttg_parsec_data_types<std::shared_ptr<value_type[]>,
235  detail::empty_allocator<element_type>>
236  ::create_data(ptr, n, scope))
237  , m_count(n)
238  { }
239 
240  template<typename Deleter>
241  Buffer(std::unique_ptr<value_type[], Deleter> ptr, std::size_t n,
243  : m_data(detail::ttg_parsec_data_types<std::unique_ptr<value_type[], Deleter>,
244  detail::empty_allocator<element_type>>
245  ::create_data(ptr, n, scope))
246  , m_count(n)
247  { }
248 
249  virtual ~Buffer() {
250  unpin(); // make sure the copies are not pinned
251  release_data();
252  }
253 
254  /* allow moving device buffers */
256  : m_data(db.m_data)
257  , m_count(db.m_count)
258  {
259  db.m_data = nullptr;
260  db.m_count = 0;
261  }
262 
263  /* explicitly disable copying of buffers
264  * TODO: should we allow this? What data to use?
265  */
266  Buffer(const Buffer& db) = delete;
267 
268  /* allow moving device buffers */
270  std::swap(m_data, db.m_data);
271  std::swap(m_count, db.m_count);
272  //std::cout << "buffer " << this << " other " << &db << " mv op ttg_copy " << m_ttg_copy << std::endl;
273  //std::cout << "buffer::move-assign from " << &db << " ttg-copy " << db.m_ttg_copy
274  // << " to " << this << " ttg-copy " << m_ttg_copy
275  // << " parsec-data " << m_data.get()
276  // << std::endl;
277  /* don't update the ttg_copy, we keep the connection */
278  return *this;
279  }
280 
281  /* explicitly disable copying of buffers
282  * TODO: should we allow this? What data to use?
283  */
284  Buffer& operator=(const Buffer& db) = delete;
285 
286  /* set the current device, useful when a device
287  * buffer was modified outside of a TTG */
289  assert(is_valid());
290  int parsec_id = detail::ttg_device_to_parsec_device(device);
291  /* make sure it's a valid device */
292  assert(parsec_nb_devices > parsec_id);
293  /* make sure it's a valid copy */
294  assert(m_data->device_copies[parsec_id] != nullptr);
295  m_data->owner_device = parsec_id;
296  }
297 
299  if (empty()) return true; // empty is current everywhere
300  int parsec_id = detail::ttg_device_to_parsec_device(dev);
301  uint32_t max_version = 0;
302  for (int i = 0; i < parsec_nb_devices; ++i) {
303  if (nullptr == m_data->device_copies[i]) continue;
304  max_version = std::max(max_version, m_data->device_copies[i]->version);
305  }
306  return (m_data->device_copies[parsec_id] &&
307  m_data->device_copies[parsec_id]->version == max_version);
308  }
309 
310  /* Get the owner device ID, i.e., the last updated
311  * device buffer.
312  * NOTE: there may be more than one device with the current
313  * data so the result may not always be what is expected.
314  * Use is_current_on() to check for a specific device. */
316  assert(is_valid());
317  if (empty()) return ttg::device::current_device(); // empty is always valid
318  return detail::parsec_device_to_ttg_device(m_data->owner_device);
319  }
320 
321  /* Get the pointer on the currently active device. */
323  assert(is_valid());
324  if (empty()) return nullptr;
326  return static_cast<pointer_type>(m_data->device_copies[device_id]->device_private);
327  }
328 
329  /* Get the pointer on the currently active device. */
331  assert(is_valid());
332  if (empty()) return nullptr;
334  return static_cast<const_pointer_type>(m_data->device_copies[device_id]->device_private);
335  }
336 
337  /* Get the pointer on the owning device.
338  * @note: This may not be the device assigned to the currently executing task.
339  * See \ref ttg::device::current_device for that. */
341  assert(is_valid());
342  if (empty()) return nullptr;
343  return static_cast<pointer_type>(m_data->device_copies[m_data->owner_device]->device_private);
344  }
345 
346  /* get the current device pointer */
348  assert(is_valid());
349  if (empty()) return nullptr;
350  return static_cast<const_pointer_type>(m_data->device_copies[m_data->owner_device]->device_private);
351  }
352 
353  /* get the device pointer at the given device
354  */
356  assert(is_valid());
357  if (empty()) return nullptr;
358  int device_id = detail::ttg_device_to_parsec_device(device);
359  return static_cast<pointer_type>(parsec_data_get_ptr(m_data, device_id));
360  }
361 
362  /* get the device pointer at the given device
363  */
365  assert(is_valid());
366  if (empty()) return nullptr;
367  int device_id = detail::ttg_device_to_parsec_device(device);
368  return static_cast<const_pointer_type>(parsec_data_get_ptr(m_data, device_id));
369  }
370 
372  return static_cast<pointer_type>(parsec_data_get_ptr(m_data, 0));
373  }
374 
376  return static_cast<const_pointer_type>(parsec_data_get_ptr(m_data, 0));
377  }
378 
379  bool is_valid_on(const ttg::device::Device& device) const {
380  assert(is_valid());
381  int device_id = detail::ttg_device_to_parsec_device(device);
382  return (parsec_data_get_ptr(m_data, device_id) != nullptr);
383  }
384 
385  void allocate_on(const ttg::device::Device& device_id) {
386  /* TODO: need exposed PaRSEC memory allocator */
387  throw std::runtime_error("not implemented yet");
388  }
389 
390  /* TODO: can we do this automatically?
391  * Pin the memory on all devices we currently track.
392  * Pinned memory won't be released by PaRSEC and can be used
393  * at any time.
394  */
395  void pin() {
396  for (int i = 1; i < parsec_nb_devices; ++i) {
397  pin_on(i);
398  }
399  }
400 
401  /* Unpin the memory on all devices we currently track. */
402  void unpin() {
403  if (!is_valid()) return;
404  for (int i = 0; i < parsec_nb_devices-detail::first_device_id; ++i) {
405  unpin_on(i);
406  }
407  }
408 
409  /* Pin the memory on a given device */
410  void pin_on(int device_id) {
411  /* TODO: how can we pin memory on a device? */
412  }
413 
414  /* Pin the memory on a given device */
415  void unpin_on(int device_id) {
416  /* TODO: how can we unpin memory on a device? */
417  }
418 
419  bool is_valid() const {
420  return (m_count == 0 || m_data);
421  }
422 
423  operator bool() const {
424  return !empty();
425  }
426 
427  std::size_t size() const {
428  return m_count;
429  }
430 
431  bool empty() const {
432  return m_count == 0;
433  }
434 
435  /* Reallocate the buffer with count elements */
436  void reset(std::size_t n, ttg::scope scope = ttg::scope::SyncIn) {
437  if (n == m_count) return;
438  release_data();
440  m_count = n;
441  }
442 
443  /* Reallocate the buffer with count elements */
444  void reset(std::shared_ptr<value_type[]> ptr, std::size_t n, ttg::scope scope = ttg::scope::SyncIn) {
445  release_data();
448  ::create_data(ptr, n, scope);
449  m_count = n;
450  }
451 
460  if (scope == ttg::scope::Allocate) {
461  m_data->device_copies[0]->version = 0;
462  } else {
463  m_data->device_copies[0]->version = 1;
464  /* reset all other copies to force a sync-in */
465  for (int i = 0; i < parsec_nb_devices; ++i) {
466  if (m_data->device_copies[i] != nullptr) {
467  m_data->device_copies[i]->version = 0;
468  }
469  }
470  }
471  m_data->owner_device = 0;
472  }
473 
474  ttg::scope scope() const {
475  /* if the host owns the data and has a version of zero we only have to allocate data */
476  if (nullptr != m_data) return ttg::scope::Invalid;
477  return (m_data->device_copies[0]->version == 0 && m_data->owner_device == 0)
479  }
480 
482  /* only set device if the host has the latest copy as otherwise we might end up with a stale copy */
483  if (dev.is_device() && m_data->owner_device == 0) {
484  parsec_advise_data_on_device(m_data, detail::ttg_device_to_parsec_device(dev),
485  PARSEC_DEV_DATA_ADVICE_PREFERRED_DEVICE);
486  }
487  }
488 
489  void add_device(ttg::device::Device dev, pointer_type ptr, bool is_current = false) {
490  if (is_valid_on(dev)) {
491  throw std::runtime_error("Unable to add device that has already a buffer set!");
492  }
494  if (is_current) {
495  // mark the data as being current on the new device
496  m_data->owner_device = detail::ttg_device_to_parsec_device(dev);
497  }
498  }
499 
500  /* serialization support */
501 
502 #ifdef TTG_SERIALIZATION_SUPPORTS_BOOST
503  template <typename Archive>
504  void serialize(Archive& ar, const unsigned int version) {
505  if constexpr (ttg::detail::is_output_archive_v<Archive>) {
506  std::size_t s = size();
507  ar& s;
508  } else {
509  std::size_t s;
510  ar & s;
511  /* initialize internal pointers and then reset */
512  reset(s);
513  }
514  }
515 #endif // TTG_SERIALIZATION_SUPPORTS_BOOST
516 
517 #ifdef TTG_SERIALIZATION_SUPPORTS_MADNESS
518  template <typename Archive>
519  std::enable_if_t<std::is_base_of_v<madness::archive::BufferInputArchive, Archive> ||
520  std::is_base_of_v<madness::archive::BufferOutputArchive, Archive>>
521  serialize(Archive& ar) {
522  if constexpr (ttg::detail::is_output_archive_v<Archive>) {
523  std::size_t s = size();
524  ar& s;
525  } else {
526  std::size_t s;
527  ar & s;
528  /* if we have been initialized already we just make sure the size matches */
529  if (m_data != nullptr) {
530  if (s != size()) {
531  throw std::runtime_error("Buffer size mismatch in deserialization!");
532  }
533  } else {
534  //std::cout << "serialize(IN) buffer " << this << " size " << s << std::endl;
535  /* initialize internal pointers and then reset */
536  reset(s);
537  }
538  }
539  }
540 #endif // TTG_SERIALIZATION_SUPPORTS_MADNESS
541 };
542 
543 namespace detail {
544  template<typename T, typename A>
545  parsec_data_t* get_parsec_data(const ttg_parsec::Buffer<T, A>& db) {
546  return const_cast<parsec_data_t*>(db.m_data);
547  }
548 } // namespace detail
549 
550 } // namespace ttg_parsec
551 
552 #endif // TTG_PARSEC_BUFFER_H
Represents a device in a specific execution space.
Definition: device.h:14
bool is_device() const
Definition: device.h:43
constexpr auto data(C &c) -> decltype(c.data())
Definition: span.h:189
Device current_device()
Definition: device.h:135
T * to_address(T *ptr)
Definition: buffer.h:45
ttg::device::Device parsec_device_to_ttg_device(int parsec_id)
Definition: device.h:30
int first_device_id
Definition: device.h:12
int ttg_device_to_parsec_device(const ttg::device::Device &device)
Definition: device.h:18
parsec_data_t * get_parsec_data(const ttg_parsec::Buffer< T, A > &db)
Definition: buffer.h:545
this contains PaRSEC-based TTG functionality
Definition: fwd.h:18
scope
Definition: devicescope.h:5
int size(World world=default_execution_context())
Definition: run.h:89
std::array< int, 3 > version()
Definition: version.cc:4
std::decay_t< T > element_type
Definition: buffer.h:197
Buffer(Buffer &&db)
Definition: buffer.h:255
std::add_pointer_t< value_type > pointer_type
Definition: buffer.h:195
const_pointer_type device_ptr_on(const ttg::device::Device &device) const
Definition: buffer.h:364
void prefer_device(ttg::device::Device dev)
Definition: buffer.h:481
void set_current_device(const ttg::device::Device &device)
Definition: buffer.h:288
std::size_t size() const
Definition: buffer.h:427
const_pointer_type owner_device_ptr() const
Definition: buffer.h:347
bool is_valid() const
Definition: buffer.h:419
void reset(std::shared_ptr< value_type[]> ptr, std::size_t n, ttg::scope scope=ttg::scope::SyncIn)
Definition: buffer.h:444
virtual ~Buffer()
Definition: buffer.h:249
bool is_valid_on(const ttg::device::Device &device) const
Definition: buffer.h:379
pointer_type host_ptr()
Definition: buffer.h:371
Buffer(const Buffer &db)=delete
ttg::device::Device get_owner_device() const
Definition: buffer.h:315
void reset_scope(ttg::scope scope)
Definition: buffer.h:459
pointer_type device_ptr_on(const ttg::device::Device &device)
Definition: buffer.h:355
Buffer & operator=(const Buffer &db)=delete
const_pointer_type host_ptr() const
Definition: buffer.h:375
Buffer(std::unique_ptr< value_type[], Deleter > ptr, std::size_t n, ttg::scope scope=ttg::scope::SyncIn)
Definition: buffer.h:241
const_pointer_type current_device_ptr() const
Definition: buffer.h:330
void add_device(ttg::device::Device dev, pointer_type ptr, bool is_current=false)
Definition: buffer.h:489
void pin_on(int device_id)
Definition: buffer.h:410
Buffer(std::shared_ptr< value_type[]> ptr, std::size_t n, ttg::scope scope=ttg::scope::SyncIn)
Definition: buffer.h:232
bool is_current_on(ttg::device::Device dev) const
Definition: buffer.h:298
std::remove_all_extents_t< T > value_type
Definition: buffer.h:194
void allocate_on(const ttg::device::Device &device_id)
Definition: buffer.h:385
ttg::scope scope() const
Definition: buffer.h:474
bool empty() const
Definition: buffer.h:431
pointer_type owner_device_ptr()
Definition: buffer.h:340
const std::remove_const_t< value_type > * const_pointer_type
Definition: buffer.h:196
Buffer(std::size_t n, ttg::scope scope=ttg::scope::SyncIn)
Definition: buffer.h:222
pointer_type current_device_ptr()
Definition: buffer.h:322
void reset(std::size_t n, ttg::scope scope=ttg::scope::SyncIn)
Definition: buffer.h:436
void unpin_on(int device_id)
Definition: buffer.h:415
Buffer & operator=(Buffer &&db)
Definition: buffer.h:269
value_type * allocate(std::size_t size)
Definition: buffer.h:34
std::decay_t< T > value_type
Definition: buffer.h:32
void deallocate(value_type *ptr, std::size_t size)
Definition: buffer.h:38
void construct(PtrT ptr, std::size_t size)
Definition: buffer.h:98
data_copy_type & operator=(const data_copy_type &)=delete
void construct(std::size_t size, const allocator_type &alloc=allocator_type())
Definition: buffer.h:106
data_copy_type & operator=(data_copy_type &&)=default
static PARSEC_OBJ_CLASS_INSTANCE(data_copy_type, parsec_data_copy_t, data_copy_construct, data_copy_destruct)
static void data_copy_destruct(data_copy_type *obj)
Definition: buffer.h:129
static parsec_data_t * create_data(PtrT &ptr, std::size_t size, ttg::scope scope)
Definition: buffer.h:158
typename allocator_traits::value_type value_type
Definition: buffer.h:62
static parsec_data_t * create_data(std::size_t size, ttg::scope scope, const allocator_type &allocator=allocator_type())
Definition: buffer.h:138
static void data_copy_construct(data_copy_type *obj)
Definition: buffer.h:123
typename allocator_traits::allocator_type allocator_type
Definition: buffer.h:61
std::allocator_traits< Allocator > allocator_traits
Definition: buffer.h:60