Renamed reduced_windows to shrink_windows.

piskvorky · Jun 10, 2021 · 77698cd · 77698cd
1 parent 9bdb5a1
commit 77698cd
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 17 deletions.
diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py
@@ -240,7 +240,7 @@ def __init__(
             max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
             sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
             trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
-            comment=None, max_final_vocab=None, reduced_windows=True,
+            comment=None, max_final_vocab=None, shrink_windows=True,
         ):
         """Train, use and evaluate neural networks described in https://code.google.com/p/word2vec/.
 
@@ -345,7 +345,7 @@ def __init__(
             :meth:`~gensim.models.word2vec.Word2Vec.get_latest_training_loss`.
         callbacks : iterable of :class:`~gensim.models.callbacks.CallbackAny2Vec`, optional
             Sequence of callbacks to be executed at specific stages during training.
-        reduced_windows : bool, optional
+        shrink_windows : bool, optional
             If True, the effective window size is uniformly sampled from  [1, `window`]
             for each target word during training, to match the original word2vec algorithm's
             approximate weighting of context words by distance. Otherwise, the effective
@@ -382,7 +382,7 @@ def __init__(
         self.min_alpha = float(min_alpha)
 
         self.window = int(window)
-        self.reduced_windows = bool(reduced_windows)
+        self.shrink_windows = bool(shrink_windows)
         self.random = np.random.RandomState(seed)
 
         self.hs = int(hs)
@@ -426,7 +426,7 @@ def __init__(
                 corpus_iterable=corpus_iterable, corpus_file=corpus_file, total_examples=self.corpus_count,
                 total_words=self.corpus_total_words, epochs=self.epochs, start_alpha=self.alpha,
                 end_alpha=self.min_alpha, compute_loss=self.compute_loss, callbacks=callbacks,
-                reduced_windows=self.reduced_windows)
+                shrink_windows=self.shrink_windows)
         else:
             if trim_rule is not None:
                 logger.warning(
@@ -969,7 +969,7 @@ def train(
             self, corpus_iterable=None, corpus_file=None, total_examples=None,
             total_words=None, epochs=None, start_alpha=None, end_alpha=None,
             word_count=0, queue_factor=2, report_delay=1.0, compute_loss=False,
-            reduced_windows=None, callbacks=(), **kwargs,
+            shrink_windows=None, callbacks=(), **kwargs,
         ):
         """Update the model's neural weights from a sequence of sentences.
 
@@ -1026,7 +1026,7 @@ def train(
         compute_loss: bool, optional
             If True, computes and stores loss value which can be retrieved using
             :meth:`~gensim.models.word2vec.Word2Vec.get_latest_training_loss`.
-        reduced_windows : bool, optional
+        shrink_windows : bool, optional
             If True, the effective window size is uniformly sampled from  [1, `window`]
             for each target word during training, to match the original word2vec algorithm's
             approximate weighting of context words by distance. Otherwise, the effective
@@ -1050,8 +1050,8 @@ def train(
         self.alpha = start_alpha or self.alpha
         self.min_alpha = end_alpha or self.min_alpha
         self.epochs = epochs
-        if reduced_windows is not None:
-            self.reduced_windows = bool(reduced_windows)
+        if shrink_windows is not None:
+            self.shrink_windows = bool(shrink_windows)
 
         self._check_training_sanity(epochs=epochs, total_examples=total_examples, total_words=total_words)
         self._check_corpus_sanity(corpus_iterable=corpus_iterable, corpus_file=corpus_file, passes=epochs)
@@ -1061,7 +1061,7 @@ def train(
             msg=(
                 f"training model with {self.workers} workers on {len(self.wv)} vocabulary and "
                 f"{self.layer1_size} features, using sg={self.sg} hs={self.hs} sample={self.sample} "
-                f"negative={self.negative} window={self.window} reduced_windows={self.reduced_windows}"
+                f"negative={self.negative} window={self.window} shrink_windows={self.shrink_windows}"
             ),
         )
 

diff --git a/gensim/models/word2vec_corpusfile.pyx b/gensim/models/word2vec_corpusfile.pyx
@@ -187,7 +187,7 @@ cdef void prepare_c_structures_for_batch(
         int *effective_words, int *effective_sentences, unsigned long long *next_random,
         cvocab_t *vocab, int *sentence_idx, np.uint32_t *indexes, int *codelens,
         np.uint8_t **codes, np.uint32_t **points, np.uint32_t *reduced_windows,
-        int do_reduced_windows) nogil:
+        int shrink_windows) nogil:
     cdef VocabItem word
     cdef string token
     cdef vector[string] sent
@@ -226,7 +226,7 @@ cdef void prepare_c_structures_for_batch(
 
     # precompute "reduced window" offsets in a single randint() call
     for i in range(effective_words[0]):
-        if do_reduced_windows:
+        if shrink_windows:
             reduced_windows[i] = random_int32(next_random) % window
         else:
             reduced_windows[i] = 0
@@ -299,7 +299,7 @@ def train_epoch_sg(model, corpus_file, offset, _cython_vocab, _cur_epoch, _expec
     cdef long long total_sentences = 0
     cdef long long total_effective_words = 0, total_words = 0
     cdef int sent_idx, idx_start, idx_end
-    cdef int do_reduced_windows = int(model.reduced_windows)
+    cdef int shrink_windows = int(model.shrink_windows)
 
     init_w2v_config(&c, model, _alpha, compute_loss, _work)
 
@@ -316,7 +316,7 @@ def train_epoch_sg(model, corpus_file, offset, _cython_vocab, _cur_epoch, _expec
             prepare_c_structures_for_batch(
                 sentences, c.sample, c.hs, c.window, &total_words, &effective_words, &effective_sentences,
                 &c.next_random, vocab.get_vocab_ptr(), c.sentence_idx, c.indexes,
-                c.codelens, c.codes, c.points, c.reduced_windows, do_reduced_windows)
+                c.codelens, c.codes, c.points, c.reduced_windows, shrink_windows)
 
             for sent_idx in range(effective_sentences):
                 idx_start = c.sentence_idx[sent_idx]
@@ -400,7 +400,7 @@ def train_epoch_cbow(model, corpus_file, offset, _cython_vocab, _cur_epoch, _exp
     cdef long long total_sentences = 0
     cdef long long total_effective_words = 0, total_words = 0
     cdef int sent_idx, idx_start, idx_end
-    cdef int do_reduced_windows = int(model.reduced_windows)
+    cdef int shrink_windows = int(model.shrink_windows)
 
     init_w2v_config(&c, model, _alpha, compute_loss, _work, _neu1)
 
@@ -417,7 +417,7 @@ def train_epoch_cbow(model, corpus_file, offset, _cython_vocab, _cur_epoch, _exp
             prepare_c_structures_for_batch(
                 sentences, c.sample, c.hs, c.window, &total_words, &effective_words,
                 &effective_sentences, &c.next_random, vocab.get_vocab_ptr(), c.sentence_idx,
-                c.indexes, c.codelens, c.codes, c.points, c.reduced_windows, do_reduced_windows)
+                c.indexes, c.codelens, c.codes, c.points, c.reduced_windows, shrink_windows)
 
             for sent_idx in range(effective_sentences):
                 idx_start = c.sentence_idx[sent_idx]

diff --git a/gensim/models/word2vec_inner.pyx b/gensim/models/word2vec_inner.pyx
@@ -570,7 +570,7 @@ def train_batch_sg(model, sentences, alpha, _work, compute_loss):
             break  # TODO: log warning, tally overflow?
 
     # precompute "reduced window" offsets in a single randint() call
-    if model.reduced_windows:
+    if model.shrink_windows:
         for i, item in enumerate(model.random.randint(0, c.window, effective_words)):
             c.reduced_windows[i] = item
     else:
@@ -669,7 +669,7 @@ def train_batch_cbow(model, sentences, alpha, _work, _neu1, compute_loss):
             break  # TODO: log warning, tally overflow?
 
     # precompute "reduced window" offsets in a single randint() call
-    if model.reduced_windows:
+    if model.shrink_windows:
         for i, item in enumerate(model.random.randint(0, c.window, effective_words)):
             c.reduced_windows[i] = item
     else: