Skip to content

Commit

Permalink
bugfix: check against num_tokens instead of num_prompt_tokens for sha…
Browse files Browse the repository at this point in the history
…red blocks
  • Loading branch information
guocuimi committed Jul 1, 2024
1 parent 437be3f commit 18b00e6
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
4 changes: 2 additions & 2 deletions src/common/slice.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class Slice final {

Slice(const std::vector<T>& data, size_t size)
: data_(data.data()), size_(size) {
CHECK(size <= data.size());
CHECK_LE(size, data.size());
}

// iterator for the slice
Expand All @@ -43,7 +43,7 @@ class Slice final {

// get a sub slice
Slice<T> slice(size_t start) const {
CHECK(start <= size_);
CHECK_LE(start, size_);
return {data_ + start, size_ - start};
}

Expand Down
6 changes: 3 additions & 3 deletions src/request/sequence.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,16 +302,16 @@ void Sequence::set_shared_blocks(std::vector<Block>&& shared_blocks) {
// update the kv cache position
size_t num_shared_tokens = blocks_.size() * blocks_[0].size();

// It is possible that num_shared_tokens == num_prompt_tokens_, indicating
// It is possible that num_shared_tokens == num_tokens_, indicating
// that the exact same prompt has been received again. In this case, it
// becomes necessary to adjust the kv cache position to the previous token,
// allowing the model proceed. While the shared blocks should be immutable
// ideally, but it remains safe to regenerate the kv cache in this context,
// given the utiliztion of the exact same token.
if (num_shared_tokens == num_prompt_tokens_) {
if (num_shared_tokens == num_tokens_) {
num_shared_tokens -= 1;
}
CHECK(num_shared_tokens < num_prompt_tokens_);
CHECK_LT(num_shared_tokens, num_tokens_);
// update the kv cache position
std::fill(num_kv_cache_tokens_.begin(),
num_kv_cache_tokens_.end(),
Expand Down

0 comments on commit 18b00e6

Please sign in to comment.