Skip to content

Commit

Permalink
refs #27: Add size/alignemt checks for datablock_kernel_arg structs.
Browse files Browse the repository at this point in the history
 * It is working as exepcted. ...then what's the problem with CUDA? :(
  • Loading branch information
achimnol committed Feb 23, 2016
1 parent e9e8770 commit c52f5a3
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 17 deletions.
7 changes: 4 additions & 3 deletions include/nba/engines/cuda/test.hh
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
namespace nba {
extern void* get_test_kernel_noop();
extern void* get_test_kernel_shiftedint_size_check();
extern void* get_test_kernel_shiftedint_value_check();
extern void *get_test_kernel_noop();
extern void *get_test_kernel_shiftedint_size_check();
extern void *get_test_kernel_shiftedint_value_check();
extern void *get_test_kernel_dbarg_size_check();
}

// vim: ts=8 sts=4 sw=4 et
8 changes: 4 additions & 4 deletions include/nba/framework/datablock_shared.hh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <cstdint>
#include <nba/core/shiftedint.hh>

struct datablock_batch_info {
struct alignas(8) datablock_batch_info {
void *buffer_bases_in;
void *buffer_bases_out;
uint32_t item_count_in;
Expand All @@ -18,14 +18,14 @@ struct datablock_batch_info {
uint16_t *item_sizes_out;
nba::dev_offset_t *item_offsets_in;
nba::dev_offset_t *item_offsets_out;
}; // __cuda_aligned
};

struct datablock_kernel_arg {
struct alignas(8) datablock_kernel_arg {
uint32_t total_item_count_in;
uint32_t total_item_count_out;
uint16_t item_size_in; // for fixed-size cases
uint16_t item_size_out; // for fixed-size cases
struct datablock_batch_info batches[0];
}; // __cuda_aligned
};

#endif
12 changes: 12 additions & 0 deletions src/engines/cuda/test.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <nba/engines/cuda/test.hh>
#include <nba/core/shiftedint.hh>
#include <nba/framework/datablock_shared.hh>

using namespace std;
using namespace nba;
Expand All @@ -23,10 +24,21 @@ __global__ void shiftedint_value_check
*raw_v = v->as_value<uint64_t>();
}

__global__ void dbarg_size_check(size_t *sizes, size_t *offsets)
{
sizes[0] = sizeof(struct datablock_kernel_arg);
offsets[0] = offsetof(struct datablock_kernel_arg, batches);
sizes[1] = sizeof(struct datablock_batch_info);
offsets[1] = offsetof(struct datablock_batch_info, item_offsets_in);
}

void *nba::get_test_kernel_shiftedint_size_check()
{ return reinterpret_cast<void *> (shiftedint_size_check); }

void *nba::get_test_kernel_shiftedint_value_check()
{ return reinterpret_cast<void *> (shiftedint_value_check); }

void *nba::get_test_kernel_dbarg_size_check()
{ return reinterpret_cast<void *> (dbarg_size_check); }

// vim: ts=8 sts=4 sw=4 et
44 changes: 34 additions & 10 deletions tests/test_cuda.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@ TEST(CUDAStructTest, ShfitedIntSizeCheck) {
ASSERT_NE(nullptr, output_d);
size_t output_h = 0;
void *raw_args[1] = { &output_d };
EXPECT_EQ(cudaSuccess, cudaLaunchKernel(k, dim3(1), dim3(1), raw_args, 0, 0));
EXPECT_EQ(cudaSuccess, cudaMemcpy(&output_h, output_d, sizeof(size_t), cudaMemcpyDeviceToHost));
ASSERT_EQ(cudaSuccess, cudaLaunchKernel(k, dim3(1), dim3(1), raw_args, 0, 0));
ASSERT_EQ(cudaSuccess, cudaMemcpy(&output_h, output_d, sizeof(size_t), cudaMemcpyDeviceToHost));
EXPECT_EQ(sizeof(nba::dev_offset_t), 2);
EXPECT_EQ(sizeof(nba::dev_offset_t), output_h);
EXPECT_EQ(cudaSuccess, cudaFree(output_d));
EXPECT_EQ(cudaSuccess, cudaDeviceSynchronize());
ASSERT_EQ(cudaSuccess, cudaFree(output_d));
ASSERT_EQ(cudaSuccess, cudaDeviceSynchronize());
}

TEST(CUDAStructTest, ShfitedIntValueCheck) {
Expand All @@ -64,15 +64,39 @@ TEST(CUDAStructTest, ShfitedIntValueCheck) {
EXPECT_EQ(165320, input_h.as_value<uint64_t>());
size_t output_h = 0;
void *raw_args[2] = { &input_d, &output_d };
EXPECT_EQ(cudaSuccess, cudaMemcpy(input_d, &input_h, sizeof(nba::dev_offset_t), cudaMemcpyHostToDevice));
EXPECT_EQ(cudaSuccess, cudaLaunchKernel(k, dim3(1), dim3(1), raw_args, 0, 0));
EXPECT_EQ(cudaSuccess, cudaMemcpy(&output_h, output_d, sizeof(uint64_t), cudaMemcpyDeviceToHost));
ASSERT_EQ(cudaSuccess, cudaMemcpy(input_d, &input_h, sizeof(nba::dev_offset_t), cudaMemcpyHostToDevice));
ASSERT_EQ(cudaSuccess, cudaLaunchKernel(k, dim3(1), dim3(1), raw_args, 0, 0));
ASSERT_EQ(cudaSuccess, cudaMemcpy(&output_h, output_d, sizeof(uint64_t), cudaMemcpyDeviceToHost));
EXPECT_EQ(165320, output_h);
EXPECT_EQ(cudaSuccess, cudaFree(input_d));
EXPECT_EQ(cudaSuccess, cudaFree(output_d));
EXPECT_EQ(cudaSuccess, cudaDeviceSynchronize());
ASSERT_EQ(cudaSuccess, cudaFree(input_d));
ASSERT_EQ(cudaSuccess, cudaFree(output_d));
ASSERT_EQ(cudaSuccess, cudaDeviceSynchronize());
}

TEST(CUDAStructTest, DatablockArgSizeAlignCheck) {
void *k = get_test_kernel_dbarg_size_check();
void *output_sizes_d;
void *output_offsets_d;
ASSERT_EQ(cudaSuccess, cudaMalloc(&output_sizes_d, sizeof(size_t) * 2));
ASSERT_NE(nullptr, output_sizes_d);
ASSERT_EQ(cudaSuccess, cudaMalloc(&output_offsets_d, sizeof(size_t) * 2));
ASSERT_NE(nullptr, output_offsets_d);
size_t output_sizes_h[2] = { 0, 0 };
size_t output_offsets_h[2] = { 0, 0 };
void *raw_args[2] = { &output_sizes_d, &output_offsets_d };
ASSERT_EQ(cudaSuccess, cudaLaunchKernel(k, dim3(1), dim3(1), raw_args, 0, 0));
ASSERT_EQ(cudaSuccess, cudaMemcpy(&output_sizes_h, output_sizes_d, sizeof(size_t) * 2, cudaMemcpyDeviceToHost));
ASSERT_EQ(cudaSuccess, cudaMemcpy(&output_offsets_h, output_offsets_d, sizeof(size_t) * 2, cudaMemcpyDeviceToHost));
EXPECT_EQ(sizeof(struct datablock_kernel_arg), output_sizes_h[0]);
EXPECT_EQ(offsetof(struct datablock_kernel_arg, batches), output_offsets_h[0]);
EXPECT_EQ(sizeof(struct datablock_batch_info), output_sizes_h[1]);
EXPECT_EQ(offsetof(struct datablock_batch_info, item_offsets_in), output_offsets_h[1]);
ASSERT_EQ(cudaSuccess, cudaFree(output_sizes_d));
ASSERT_EQ(cudaSuccess, cudaFree(output_offsets_d));
ASSERT_EQ(cudaSuccess, cudaDeviceSynchronize());
}


#else

TEST(CUDATest, Noop) {
Expand Down

0 comments on commit c52f5a3

Please sign in to comment.