diff --git a/llama.cpp b/llama.cpp index ee6ec0920fc9c..a491f1c7e15c4 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1291,12 +1291,6 @@ static bool llama_eval_internal( LLAMA_ASSERT((!tokens && embd) || (tokens && !embd)); - // enforce that the first token is BOS - if (tokens && n_past == 0 && tokens[0] != llama_token_bos()) { - fprintf(stderr, "%s: first token must be BOS\n", __func__); - return false; - } - const int64_t t_start_us = ggml_time_us(); const int N = n_tokens;