diff --git a/rank_bm25.py b/rank_bm25.py index 31a0584..0008e38 100644 --- a/rank_bm25.py +++ b/rank_bm25.py @@ -93,7 +93,7 @@ def _calc_idf(self, nd): # idf can be negative if word is contained in more than half of documents negative_idfs = [] for word, freq in nd.items(): - idf = math.log(self.corpus_size - freq + 0.5) - math.log(freq + 0.5) + idf = math.log(self.corpus_size + 1) - math.log(freq + 0.5) self.idf[word] = idf idf_sum += idf if idf < 0: