From 18b00e6ca0544f61efcfc4f934927e088371e960 Mon Sep 17 00:00:00 2001 From: Michael Mi Date: Mon, 1 Jul 2024 00:49:08 -0700 Subject: [PATCH] bugfix: check against num_tokens instead of num_prompt_tokens for shared blocks --- src/common/slice.h | 4 ++-- src/request/sequence.cpp | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/common/slice.h b/src/common/slice.h index 754f4c4c..967021f7 100644 --- a/src/common/slice.h +++ b/src/common/slice.h @@ -18,7 +18,7 @@ class Slice final { Slice(const std::vector& data, size_t size) : data_(data.data()), size_(size) { - CHECK(size <= data.size()); + CHECK_LE(size, data.size()); } // iterator for the slice @@ -43,7 +43,7 @@ class Slice final { // get a sub slice Slice slice(size_t start) const { - CHECK(start <= size_); + CHECK_LE(start, size_); return {data_ + start, size_ - start}; } diff --git a/src/request/sequence.cpp b/src/request/sequence.cpp index 973df853..2b4e0162 100644 --- a/src/request/sequence.cpp +++ b/src/request/sequence.cpp @@ -302,16 +302,16 @@ void Sequence::set_shared_blocks(std::vector&& shared_blocks) { // update the kv cache position size_t num_shared_tokens = blocks_.size() * blocks_[0].size(); - // It is possible that num_shared_tokens == num_prompt_tokens_, indicating + // It is possible that num_shared_tokens == num_tokens_, indicating // that the exact same prompt has been received again. In this case, it // becomes necessary to adjust the kv cache position to the previous token, // allowing the model proceed. While the shared blocks should be immutable // ideally, but it remains safe to regenerate the kv cache in this context, // given the utiliztion of the exact same token. - if (num_shared_tokens == num_prompt_tokens_) { + if (num_shared_tokens == num_tokens_) { num_shared_tokens -= 1; } - CHECK(num_shared_tokens < num_prompt_tokens_); + CHECK_LT(num_shared_tokens, num_tokens_); // update the kv cache position std::fill(num_kv_cache_tokens_.begin(), num_kv_cache_tokens_.end(),