diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d99f98bae..27aafa5af 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -64,6 +64,7 @@ add_vexcl_test(reduce_by_key reduce_by_key.cpp) add_vexcl_test(logical logical.cpp) add_vexcl_test(threads threads.cpp) add_vexcl_test(svm svm.cpp) +add_vexcl_test(events events.cpp) add_vexcl_test(multiple_objects "dummy1.cpp;dummy2.cpp") if (NOT DEFINED ENV{APPVEYOR}) diff --git a/tests/events.cpp b/tests/events.cpp new file mode 100644 index 000000000..a3d2cb6a9 --- /dev/null +++ b/tests/events.cpp @@ -0,0 +1,37 @@ +#define BOOST_TEST_MODULE Let +#include +#include +#include +#include +#include "context_setup.hpp" + +BOOST_AUTO_TEST_CASE(let_vector_expr) +{ + const size_t n = 16 * 1024; + + std::vector q1(1, ctx.queue(0)); + std::vector q2(1, vex::backend::duplicate_queue(ctx.queue(0))); + + vex::vector x(q1, n); + vex::vector y(q2, n); + + vex::Reductor count(q2); + + x = 1; + q1[0].finish(); + + auto e = vex::let(x) = 2; + let(y, e) = x; + + BOOST_CHECK_EQUAL(count(y != 2), 0); + + q1[0].finish(); + + x = 3; e[0] = vex::backend::enqueue_marker(q1[0]); + let(y, e) = x; + + BOOST_CHECK_EQUAL(count(y != 3), 0); +} + +BOOST_AUTO_TEST_SUITE_END() + diff --git a/vexcl/backend/compute/event.hpp b/vexcl/backend/compute/event.hpp new file mode 100644 index 000000000..4427964cb --- /dev/null +++ b/vexcl/backend/compute/event.hpp @@ -0,0 +1,61 @@ +#ifndef VEXCL_BACKEND_COMPUTE_EVENT_HPP +#define VEXCL_BACKEND_COMPUTE_EVENT_HPP + +/* +The MIT License + +Copyright (c) 2012-2016 Denis Demidov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * \file vexcl/backend/compute/event.hpp + * \author Denis Demidov + * \brief Bring Boost.Compute events into vex::backend::compute namespace. + */ + +#include +#include + +namespace vex { +namespace backend { +namespace compute { + +using boost::compute::event; +using boost::compute::wait_list; + +inline void wait_list_append(wait_list &dst, const event &e) { + dst.insert(e); +} + +inline void wait_list_append(wait_list &dst, const wait_list &src) { + for(size_t i = 0; i < src.size(); ++i) + dst.insert(src[i]); +} + +inline boost::compute::event enqueue_marker(const boost::compute::command_queue &q) { + return q.enqueue_marker(); +} + +} // namespace compute +} // namespace backend +} // namespace vex + +#endif diff --git a/vexcl/backend/compute/kernel.hpp b/vexcl/backend/compute/kernel.hpp index ab9eaa163..b3ceaca2b 100644 --- a/vexcl/backend/compute/kernel.hpp +++ b/vexcl/backend/compute/kernel.hpp @@ -36,6 +36,7 @@ THE SOFTWARE. #include #include #include +#include namespace vex { namespace backend { @@ -100,17 +101,37 @@ class kernel { } /// Enqueue the kernel to the specified command queue. - void operator()(boost::compute::command_queue q) { - q.enqueue_nd_range_kernel(K, 3, NULL, g_size.dim, w_size.dim); + boost::compute::event operator()(boost::compute::command_queue q) { argpos = 0; + return q.enqueue_nd_range_kernel(K, 3, NULL, g_size.dim, w_size.dim); + } + + /// Enqueue the kernel to the specified command queue, provide wait list. + boost::compute::event operator()( + boost::compute::command_queue q, + const boost::compute::wait_list &events) + { + argpos = 0; + return q.enqueue_nd_range_kernel(K, 3, NULL, g_size.dim, w_size.dim, events); } #ifndef BOOST_NO_VARIADIC_TEMPLATES /// Enqueue the kernel to the specified command queue with the given arguments template - void operator()(boost::compute::command_queue q, Args&&... args) { - K.set_args(std::forward(args)...); - (*this)(q); + boost::compute::event operator()(boost::compute::command_queue q, const Args&... args) { + K.set_args(args...); + return (*this)(q); + } + + /// Enqueue the kernel to the specified command queue with the given arguments + template + boost::compute::event operator()( + boost::compute::command_queue q, + const boost::compute::wait_list &events, + const Args&... args) + { + K.set_args(args...); + return (*this)(q, events); } #endif diff --git a/vexcl/backend/cuda/context.hpp b/vexcl/backend/cuda/context.hpp index 1a5d07789..b1a3ece8d 100644 --- a/vexcl/backend/cuda/context.hpp +++ b/vexcl/backend/cuda/context.hpp @@ -101,6 +101,9 @@ class device { /// Returns raw CUdevice handle. CUdevice raw() const { return d; } + /// Returns raw CUdevice handle. + operator CUdevice() const { return d; } + /// Returns name of the device. std::string name() const { char name[256]; @@ -166,6 +169,11 @@ class context { return c.get(); } + /// Returns raw CUcontext handle. + operator CUcontext() const { + return c.get(); + } + /// Binds the context to the calling CPU thread. void set_current() const { cuda_check( cuCtxSetCurrent( c.get() ) ); @@ -223,6 +231,12 @@ class command_queue { CUstream raw() const { return s.get(); } + + /// Returns raw CUstream handle for the command queue. + operator CUstream() const { + return s.get(); + } + private: vex::backend::context ctx; vex::backend::device dev; diff --git a/vexcl/backend/cuda/event.hpp b/vexcl/backend/cuda/event.hpp new file mode 100644 index 000000000..8463ce1a6 --- /dev/null +++ b/vexcl/backend/cuda/event.hpp @@ -0,0 +1,97 @@ +#ifndef VEXCL_BACKEND_CUDA_EVENT_HPP +#define VEXCL_BACKEND_CUDA_EVENT_HPP + +/* +The MIT License + +Copyright (c) 2012-2016 Denis Demidov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * \file vexcl/backend/compute/event.hpp + * \author Denis Demidov + * \brief Bring Boost.Compute events into vex::backend::compute namespace. + */ + +#include + +namespace vex { +namespace backend { +namespace cuda { + +namespace detail { + +template <> +struct deleter_impl { + static void dispose(CUevent e) { + cuda_check( cuEventDestroy(e) ); + } +}; + +} // namespace detail + +class event { + public: + event(const command_queue &q) + : q(q), e( create(q), detail::deleter(q.context().raw()) ) { } + + CUevent raw() const { return e.get(); } + + operator CUevent() const { return e.get(); } + + void wait() const { + cuda_check( cuStreamWaitEvent(q.raw(), e.get(), 0) ); + } + private: + command_queue q; + std::shared_ptr::type> e; + + static CUevent create(const command_queue &q) { + CUevent e; + q.context().set_current(); + + cuda_check( cuEventCreate(&e, CU_EVENT_DEFAULT) ); + cuda_check( cuEventRecord(e, q.raw()) ); + + return e; + } +}; + +typedef std::vector wait_list; + +inline void wait_list_append(wait_list &dst, const event &e) { + dst.push_back(e); +} + +inline void wait_list_append(wait_list &dst, const wait_list &src) { + dst.insert(dst.begin(), src.begin(), src.end()); +} + +inline event enqueue_marker(const command_queue &q) { + return event(q); +} + +} // namespace cuda +} // namespace backend +} // namespace vex + + +#endif diff --git a/vexcl/backend/cuda/kernel.hpp b/vexcl/backend/cuda/kernel.hpp index 6fab66d6f..3367a166e 100644 --- a/vexcl/backend/cuda/kernel.hpp +++ b/vexcl/backend/cuda/kernel.hpp @@ -36,6 +36,7 @@ THE SOFTWARE. #include #include +#include namespace vex { namespace backend { @@ -98,11 +99,14 @@ class kernel { } /// Enqueue the kernel to the specified command queue. - void operator()(const command_queue &q) { + event operator()(const command_queue &q, const wait_list &events) { prm_addr.clear(); for(auto p = prm_pos.begin(); p != prm_pos.end(); ++p) prm_addr.push_back(stack.data() + *p); + for(auto e = events.begin(); e != events.end(); ++e) + cuda_check( cuEventSynchronize(e->raw()) ); + cuda_check( cuLaunchKernel( K, @@ -117,15 +121,32 @@ class kernel { stack.clear(); prm_pos.clear(); + + return event(q); + } + + /// Enqueue the kernel to the specified command queue. + event operator()(const command_queue &q) { + return (*this)(q, wait_list()); } #ifndef BOOST_NO_VARIADIC_TEMPLATES /// Enqueue the kernel to the specified command queue with the given arguments template - void operator()(const command_queue &q, Arg1 &&arg1, OtherArgs&&... other_args) { - push_arg(std::forward(arg1)); + event operator()(const command_queue &q, + const Arg1 &arg1, const OtherArgs&... other_args) + { + push_arg(arg1); + return (*this)(q, other_args...); + } - (*this)(q, std::forward(other_args)...); + /// Enqueue the kernel to the specified command queue with the given arguments + template + event operator()(const command_queue &q, const wait_list &events, + const Arg1 &arg1, const OtherArgs&... other_args) + { + push_arg(arg1); + return (*this)(q, events, other_args...); } #endif diff --git a/vexcl/backend/opencl/event.hpp b/vexcl/backend/opencl/event.hpp new file mode 100644 index 000000000..375c8c21d --- /dev/null +++ b/vexcl/backend/opencl/event.hpp @@ -0,0 +1,64 @@ +#ifndef VEXCL_BACKEND_OPENCL_EVENT_HPP +#define VEXCL_BACKEND_OPENCL_EVENT_HPP + +/* +The MIT License + +Copyright (c) 2012-2016 Denis Demidov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * \file vexcl/backend/opencl/event.hpp + * \author Denis Demidov + * \brief Bring OpenCL events into vex::backend::opencl namespace. + */ + +#include + +#include +#include + +namespace vex { +namespace backend { +namespace opencl { + +typedef cl::Event event; +typedef std::vector wait_list; + +inline void wait_list_append(wait_list &dst, const event &e) { + dst.push_back(e); +} + +inline void wait_list_append(wait_list &dst, const wait_list &src) { + dst.insert(dst.begin(), src.begin(), src.end()); +} + +inline cl::Event enqueue_marker(cl::CommandQueue &q) { + cl::Event e; + q.enqueueMarkerWithWaitList(NULL, &e); + return e; +} + +} // namespace opencl +} // namespace backend +} // namespace vex + +#endif diff --git a/vexcl/backend/opencl/kernel.hpp b/vexcl/backend/opencl/kernel.hpp index 9c91aea9d..f58dec565 100644 --- a/vexcl/backend/opencl/kernel.hpp +++ b/vexcl/backend/opencl/kernel.hpp @@ -37,6 +37,7 @@ THE SOFTWARE. #include #include +#include namespace vex { namespace backend { @@ -104,18 +105,43 @@ class kernel { } /// Enqueue the kernel to the specified command queue. - void operator()(const cl::CommandQueue &q) { - q.enqueueNDRangeKernel(K, cl::NullRange, g_size, w_size); + cl::Event operator()(const cl::CommandQueue &q) { argpos = 0; + cl::Event e; + q.enqueueNDRangeKernel(K, cl::NullRange, g_size, w_size, NULL, &e); + return e; } + /// Enqueue the kernel to the specified command queue, provide wait list. + cl::Event operator()( + const cl::CommandQueue &q, + const std::vector &events + ) + { + argpos = 0; + cl::Event e; + q.enqueueNDRangeKernel(K, cl::NullRange, g_size, w_size, &events, &e); + return e; + } #ifndef BOOST_NO_VARIADIC_TEMPLATES /// Enqueue the kernel to the specified command queue with the given arguments template - void operator()(const cl::CommandQueue &q, Arg1 &&arg1, OtherArgs&&... other_args) { - push_arg(std::forward(arg1)); + cl::Event operator()(const cl::CommandQueue &q, const Arg1 &arg1, const OtherArgs&... other_args) { + push_arg(arg1); + + return (*this)(q, other_args...); + } + + /// Enqueue the kernel to the specified command queue with the given arguments + template + cl::Event operator()( + const cl::CommandQueue &q, + const std::vector &events, + const Arg1 &arg1, const OtherArgs&... other_args) + { + push_arg(arg1); - (*this)(q, std::forward(other_args)...); + return (*this)(q, events, other_args...); } #endif diff --git a/vexcl/let.hpp b/vexcl/let.hpp new file mode 100644 index 000000000..d215573dc --- /dev/null +++ b/vexcl/let.hpp @@ -0,0 +1,86 @@ +#ifndef VEXCL_LET_HPP +#define VEXCL_LET_HPP + +/* +The MIT License + +Copyright (c) 2012-2016 Denis Demidov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * \file vexcl/let.hpp + * \author Denis Demidov + * \brief Wrapper around expression assignment exposing OpenCL/CUDA events. + */ + +#include + +namespace vex { + +template +struct let_vector_impl { + LHS &lhs; + const backend::wait_list &events; + + let_vector_impl(LHS &lhs, const backend::wait_list &events) + : lhs(lhs), events(events) {} + +#ifdef DOXYGEN +#define VEXCL_ASSIGNMENT(op, op_type) \ + /** Expression assignment operator. */ \ + template backend::wait_list operator op(const RHS &rhs); +#else +#define VEXCL_ASSIGNMENT(op, op_type) \ + template \ + auto operator op(const RHS &rhs) -> \ + typename std::enable_if< \ + boost::proto::matches< \ + typename boost::proto::result_of::as_expr::type, \ + vector_expr_grammar>::value, \ + backend::wait_list>::type \ + { \ + return detail::assign_expression(lhs, rhs, events); \ + } +#endif + + VEXCL_ASSIGNMENTS(VEXCL_ASSIGNMENT) + +#undef VEXCL_ASSIGNMENT +}; + +/// Assignment operation proxy. +/** + * Exposes OpenCL/CUDA event system. + */ +template +auto let(LHS &lhs, const backend::wait_list &events = backend::wait_list()) -> + typename std::enable_if< + boost::proto::matches< + typename boost::proto::result_of::as_expr::type, + vector_expr_grammar>::value, + let_vector_impl >::type +{ + return let_vector_impl(lhs, events); +} + +} // namespace vex + +#endif diff --git a/vexcl/operations.hpp b/vexcl/operations.hpp index b30ee2069..92570a6d7 100644 --- a/vexcl/operations.hpp +++ b/vexcl/operations.hpp @@ -1812,9 +1812,10 @@ struct return_type { // Assign expression to lhs //--------------------------------------------------------------------------- template -void assign_expression(LHS &lhs, const RHS &rhs, +backend::wait_list assign_expression(LHS &lhs, const RHS &rhs, const std::vector &queue, - const std::vector &part + const std::vector &part, + const backend::wait_list &events = backend::wait_list() ) { #if (VEXCL_CHECK_SIZES > 0) @@ -1836,6 +1837,8 @@ void assign_expression(LHS &lhs, const RHS &rhs, #endif static kernel_cache cache; + backend::wait_list ev; + for(unsigned d = 0; d < queue.size(); d++) { auto kernel = cache.find(queue[d]); @@ -1889,13 +1892,17 @@ void assign_expression(LHS &lhs, const RHS &rhs, extract_terminals()( boost::proto::as_child(lhs), setarg); extract_terminals()( boost::proto::as_child(rhs), setarg); - kernel->second(queue[d]); + backend::wait_list_append(ev, kernel->second(queue[d], events)); } } + + return ev; } template -void assign_expression(LHS &lhs, const RHS &rhs) { +backend::wait_list assign_expression(LHS &lhs, const RHS &rhs, + const backend::wait_list &events = backend::wait_list()) +{ get_expression_properties prop; extract_terminals()(boost::proto::as_child(lhs), prop); @@ -1903,7 +1910,7 @@ void assign_expression(LHS &lhs, const RHS &rhs) { "Can not determine expression size and queue list" ); - assign_expression(lhs, rhs, prop.queue, prop.part); + return assign_expression(lhs, rhs, prop.queue, prop.part, events); } // Static for loop