From 18b00e6ca0544f61efcfc4f934927e088371e960 Mon Sep 17 00:00:00 2001
From: Michael Mi <guocuimi@gmail.com>
Date: Mon, 1 Jul 2024 00:49:08 -0700
Subject: [PATCH] bugfix: check against num_tokens instead of num_prompt_tokens
 for shared blocks

---
 src/common/slice.h       | 4 ++--
 src/request/sequence.cpp | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/common/slice.h b/src/common/slice.h
index 754f4c4c..967021f7 100644
--- a/src/common/slice.h
+++ b/src/common/slice.h
@@ -18,7 +18,7 @@ class Slice final {
 
   Slice(const std::vector<T>& data, size_t size)
       : data_(data.data()), size_(size) {
-    CHECK(size <= data.size());
+    CHECK_LE(size, data.size());
   }
 
   // iterator for the slice
@@ -43,7 +43,7 @@ class Slice final {
 
   // get a sub slice
   Slice<T> slice(size_t start) const {
-    CHECK(start <= size_);
+    CHECK_LE(start, size_);
     return {data_ + start, size_ - start};
   }
 
diff --git a/src/request/sequence.cpp b/src/request/sequence.cpp
index 973df853..2b4e0162 100644
--- a/src/request/sequence.cpp
+++ b/src/request/sequence.cpp
@@ -302,16 +302,16 @@ void Sequence::set_shared_blocks(std::vector<Block>&& shared_blocks) {
   // update the kv cache position
   size_t num_shared_tokens = blocks_.size() * blocks_[0].size();
 
-  // It is possible that num_shared_tokens == num_prompt_tokens_, indicating
+  // It is possible that num_shared_tokens == num_tokens_, indicating
   // that the exact same prompt has been received again. In this case, it
   // becomes necessary to adjust the kv cache position to the previous token,
   // allowing the model proceed. While the shared blocks should be immutable
   // ideally, but it remains safe to regenerate the kv cache in this context,
   // given the utiliztion of the exact same token.
-  if (num_shared_tokens == num_prompt_tokens_) {
+  if (num_shared_tokens == num_tokens_) {
     num_shared_tokens -= 1;
   }
-  CHECK(num_shared_tokens < num_prompt_tokens_);
+  CHECK_LT(num_shared_tokens, num_tokens_);
   // update the kv cache position
   std::fill(num_kv_cache_tokens_.begin(),
             num_kv_cache_tokens_.end(),