Skip to content

Commit

Permalink
Add makeAvailable
Browse files Browse the repository at this point in the history
Adds an overload set `makeAvailable` that only copies a buffer if the destination device requires the buffer in a different memory space. Otherwise, no copy is performed and just the handles are adjusted.

Fixes: #28
  • Loading branch information
bernhardmgruber committed Jan 25, 2023
1 parent 1482dc4 commit 745c3c5
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 0 deletions.
10 changes: 10 additions & 0 deletions docs/source/basic/cheatsheet.rst
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,16 @@ Enqueue a memory copy from device to host

memcpy(queue, bufHost, bufDevice, extent);

Makes the memory of bufA available on dev via bufB. A zero-copy can be performed and bufA and bufB may share the memory.
.. code-block:: c++

auto bufB = makeAvailable(queue, dev, bufA);

Makes the memory of bufB available on the device of bufA, as bufA. A zero-copy can be performed and bufA and bufB may share the memory.
.. code-block:: c++

makeAvailable(queue, bufA, bufB);

.. raw:: pdf
PageBreak
Expand Down
78 changes: 78 additions & 0 deletions include/alpaka/mem/buf/Traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,4 +191,82 @@ namespace alpaka

ALPAKA_UNREACHABLE(allocBuf<TElem, TIdx>(host, extent));
}

namespace detail
{
// TODO(bgruber): very crude
template<typename DevDst, typename DevSrc>
auto canZeroCopy(DevDst const& devDst, DevSrc const& devSrc) -> bool
{
if constexpr(std::is_same_v<DevDst, DevSrc>)
if(devSrc == devDst)
return true;
return false;
}
} // namespace detail

//! Makes the content of the source view available on the device associated with the destination queue. If the
//! destination shares the same memory space as the source view, no copy is performed and the destination view is
//! updated to share the same buffer as the source view. Otherwise, a memcpy is performed from source to
//! destination view.
template<typename TQueue, typename TViewDst, typename TViewSrc>
ALPAKA_FN_HOST void makeAvailable(TQueue& queue, TViewDst& viewDst, TViewSrc const& viewSrc)
{
ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;

if constexpr(std::is_same_v<TViewSrc, TViewDst>) // TODO(bgruber): lift this by converting buffer types
if(detail::canZeroCopy(getDev(viewDst), getDev(viewSrc)))
{
#if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
std::cout << "zero_memcopy: copy elided\n";
#endif
viewDst = viewSrc;
return;
}

#if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
std::cout << "zero_memcopy: deep copy required\n";
#endif
memcpy(queue, viewDst, viewSrc);
}

//! Makes the content of the source view available on the destination device. If the destination shares the same
//! memory space as the source view, no copy is performed and the source view is returned. Otherwise a newly
//! allocated buffer is created on the destination device and the content of the source view copied to it.
template<
typename TQueue,
typename TDevDst,
typename TViewSrc,
std::enable_if_t<isDevice<TDevDst>, int> = 0,
typename TViewDst = Buf<TDevDst, Elem<TViewSrc>, Dim<TViewSrc>, Idx<TViewSrc>>>
ALPAKA_FN_HOST auto makeAvailable(TQueue& queue, TDevDst const& dstDev, TViewSrc const& viewSrc) -> TViewDst
{
ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;

if constexpr(std::is_same_v<TViewSrc, TViewDst>) // TODO(bgruber): lift this by converting buffer types
if(detail::canZeroCopy(dstDev, getDev(viewSrc)))
{
#if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
std::cout << "zero_memcopy: shallow copy returned\n";
#endif
return viewSrc;
}

using E = Elem<TViewSrc>;
using I = Idx<TViewSrc>;
auto const extent = getExtentVec(viewSrc);
TViewDst dst = [&]
{
using TDevQueue = Dev<TQueue>;
if constexpr(std::is_same_v<TDevQueue, TDevDst>)
if(getDev(queue) == dstDev)
return allocAsyncBufIfSupported<E, I>(queue, extent);
return allocBuf<E, I>(dstDev, extent);
}();
memcpy(queue, dst, viewSrc);
#if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
std::cout << "zero_memcopy: deep copy returned\n";
#endif
return dst;
}
} // namespace alpaka
52 changes: 52 additions & 0 deletions test/unit/mem/buf/src/BufTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,3 +325,55 @@ TEMPLATE_LIST_TEST_CASE("memBufMove", "[memBuf]", alpaka::test::TestAccs)
CHECK(read(buf2) == 1);
} // both buffers destruct fine here
}


TEMPLATE_LIST_TEST_CASE("Zerocopy", "[memBuf]", alpaka::test::TestAccs)
{
using Acc = TestType;
using Dim = alpaka::Dim<Acc>;
using Idx = alpaka::Idx<Acc>;
using Dev = alpaka::Dev<Acc>;
using Queue = alpaka::test::DefaultQueue<Dev>;
using Elem = int;

constexpr auto accIsHostDev = std::is_same_v<Dev, alpaka::DevCpu>;

auto const extent
= alpaka::createVecFromIndexedFn<Dim, alpaka::test::CreateVecWithIdx<Idx>::template ForExtentBuf>();
auto const hostDev = alpaka::getDevByIdx<alpaka::PltfCpu>(0);
auto const accDev = alpaka::getDevByIdx<alpaka::Pltf<Dev>>(0);
auto queue = Queue(accDev);

// create and fill host buffer
auto hostBuf = alpaka::allocBuf<Elem, Idx>(hostDev, extent);
alpaka::test::iotaFillView(queue, hostBuf);
{
INFO("hostBuf initially");
alpaka::test::iotaCheckView(queue, hostBuf);
}

// zero-copy to device, check it there
auto devBuf = alpaka::makeAvailable(queue, accDev, hostBuf);
if constexpr(accIsHostDev)
CHECK(alpaka::getPtrNative(devBuf) == alpaka::getPtrNative(hostBuf));
{
INFO("devBuf");
alpaka::test::iotaCheckView(queue, devBuf);
}

// case 1: zero-copy back to host into existing buffer, check it there
{
alpaka::makeAvailable(queue, hostBuf, devBuf);
INFO("hostBuf after copying back");
alpaka::test::iotaCheckView(queue, hostBuf);
}

// case 2: zero-copy back to host into new buffer, check it there
{
auto dstHostBuf = alpaka::makeAvailable(queue, hostDev, devBuf);
if constexpr(accIsHostDev)
CHECK(alpaka::getPtrNative(devBuf) == alpaka::getPtrNative(hostBuf));
INFO("dstHostBuf after copying back");
alpaka::test::iotaCheckView(queue, dstHostBuf);
}
}

0 comments on commit 745c3c5

Please sign in to comment.