Skip to content

Commit

Permalink
Resolve some corner cases in spmv
Browse files Browse the repository at this point in the history
  • Loading branch information
ddemidov committed Oct 14, 2016
1 parent 6187271 commit faf15e4
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 10 deletions.
13 changes: 10 additions & 3 deletions vexcl/sparse/csr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class csr {
src.new_line()
<< type_name<res_type>() << " " << prm_name << "_sum = "
<< res_type() << ";";
src.new_line() << "if (" << prm_name << "_ptr)";
src.open("{");
src.new_line() << type_name<Ptr>() << " row_beg = " << prm_name << "_ptr[idx];";
src.new_line() << type_name<Ptr>() << " row_end = " << prm_name << "_ptr[idx+1];";
Expand Down Expand Up @@ -128,9 +129,15 @@ class csr {
backend::kernel &kernel, unsigned part, size_t index_offset,
detail::kernel_generator_state_ptr state) const
{
kernel.push_arg(ptr);
kernel.push_arg(col);
kernel.push_arg(val);
if (nnz) {
kernel.push_arg(ptr);
kernel.push_arg(col);
kernel.push_arg(val);
} else {
kernel.push_arg(static_cast<size_t>(0));
kernel.push_arg(static_cast<size_t>(0));
kernel.push_arg(static_cast<size_t>(0));
}

detail::set_expression_argument x_args(kernel, part, index_offset, state);
detail::extract_terminals()( boost::proto::as_child(x), x_args);
Expand Down
27 changes: 20 additions & 7 deletions vexcl/sparse/distributed.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ class distributed {
for(int d = 0; d < static_cast<int>(q.size()); ++d) {
size_t loc_rows = row_part[d+1] - row_part[d];

if (!loc_rows) continue;

col_type col_beg = col_part[d];
col_type col_end = col_part[d+1];

Expand Down Expand Up @@ -116,9 +118,10 @@ class distributed {
// Create local and remote parts of the matrix on the current
// device.
std::vector<backend::command_queue> qd = {q[d]};
A_loc[d] = std::make_shared<Matrix>(
qd, loc_rows, col_end - col_beg,
loc_ptr, loc_col, loc_val, fast_setup);
if (loc_nnz)
A_loc[d] = std::make_shared<Matrix>(
qd, loc_rows, col_end - col_beg,
loc_ptr, loc_col, loc_val, fast_setup);

if (nrcols)
A_rem[d] = std::make_shared<Matrix>(
Expand Down Expand Up @@ -169,6 +172,7 @@ class distributed {
// See what elements of rem_vals each GPU needs to receive:
for(size_t d = 0; d < q.size(); ++d) {
size_t nrecv = rcols[d].size();
if (!nrecv) continue;

ex[d].vals_to_recv.resize(nrecv);
ex[d].rem_x = backend::device_vector<rhs_type>(q[d], nrecv);
Expand Down Expand Up @@ -199,9 +203,11 @@ class distributed {

size_t nsend = rval_ptr[d+1] - rval_ptr[d];

ex[d].vals_to_send = backend::device_vector<rhs_type>(q[d], nsend);
ex[d].cols_to_send = backend::device_vector<col_type>(q[d], nsend,
&rem_cols[rval_ptr[d]]);
if (nsend) {
ex[d].vals_to_send = backend::device_vector<rhs_type>(q[d], nsend);
ex[d].cols_to_send = backend::device_vector<col_type>(q[d], nsend,
&rem_cols[rval_ptr[d]]);
}
}

rem_vals.resize(rem_cols.size());
Expand Down Expand Up @@ -273,7 +279,14 @@ class distributed {
backend::kernel &kernel, unsigned part, size_t index_offset,
detail::kernel_generator_state_ptr state) const
{
A_loc[part]->kernel_arg_setter(x, kernel, part, index_offset, state);
if (A_loc[part]) {
A_loc[part]->kernel_arg_setter(x, kernel, part, index_offset, state);
} else {
Matrix dummy_A(q[part]);
backend::device_vector<rhs_type> dummy_x;
dummy_A.kernel_arg_setter(dummy_x, kernel, part, index_offset, state);
}

if (A_rem[part]) {
A_rem[part]->kernel_arg_setter(ex[part].rem_x, kernel, part, index_offset, state);
} else {
Expand Down

0 comments on commit faf15e4

Please sign in to comment.