From e770f6ceea4a73b23af1e61f99f6af64b767da7c Mon Sep 17 00:00:00 2001 From: bsantamaria Date: Fri, 28 Jan 2022 11:08:11 +0100 Subject: [PATCH 1/3] Add parameter encoding to flashtext --- docs/docs/extractors/flashtext.md | 1 + rasa_nlu_examples/extractors/flashtext_entity_extractor.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/docs/extractors/flashtext.md b/docs/docs/extractors/flashtext.md index 24f600e..6d7d461 100644 --- a/docs/docs/extractors/flashtext.md +++ b/docs/docs/extractors/flashtext.md @@ -29,6 +29,7 @@ use the parameter `non_word_boundaries` - **entity_name**: the name of the entity to attach to the message - **case_sensitive**: whether to consider case when matching entities. `False` by default. - **non_word_boundaries**: characters which shouldn't be considered word boundaries. +- **encoding**: the name of the encoding used to read the lookup text file. ## Base Usage diff --git a/rasa_nlu_examples/extractors/flashtext_entity_extractor.py b/rasa_nlu_examples/extractors/flashtext_entity_extractor.py index 59f328d..cad6746 100644 --- a/rasa_nlu_examples/extractors/flashtext_entity_extractor.py +++ b/rasa_nlu_examples/extractors/flashtext_entity_extractor.py @@ -45,6 +45,7 @@ def get_default_config() -> Dict[Text, Any]: "non_word_boundaries": "", "path": None, "entity_name": None, + "encoding": None, } def __init__( @@ -61,9 +62,10 @@ def __init__( self.keyword_processor = KeywordProcessor( case_sensitive=config["case_sensitive"] ) + self.encoding = config.get("encoding") for non_word_boundary in config["non_word_boundaries"]: self.keyword_processor.add_non_word_boundary(non_word_boundary) - words = pathlib.Path(self.path).read_text().split("\n") + words = pathlib.Path(self.path).read_text(encoding=self.encoding).split("\n") if len(words) == 0: rasa.shared.utils.io.raise_warning( f"No words found in the {pathlib.Path(self.path)} file." From 443ebec1142ee38adae6cff39138a4baf0e5569d Mon Sep 17 00:00:00 2001 From: bsantamaria Date: Fri, 28 Jan 2022 11:18:59 +0100 Subject: [PATCH 2/3] Remove class parameter encoding --- rasa_nlu_examples/extractors/flashtext_entity_extractor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rasa_nlu_examples/extractors/flashtext_entity_extractor.py b/rasa_nlu_examples/extractors/flashtext_entity_extractor.py index cad6746..98634fc 100644 --- a/rasa_nlu_examples/extractors/flashtext_entity_extractor.py +++ b/rasa_nlu_examples/extractors/flashtext_entity_extractor.py @@ -62,10 +62,9 @@ def __init__( self.keyword_processor = KeywordProcessor( case_sensitive=config["case_sensitive"] ) - self.encoding = config.get("encoding") for non_word_boundary in config["non_word_boundaries"]: self.keyword_processor.add_non_word_boundary(non_word_boundary) - words = pathlib.Path(self.path).read_text(encoding=self.encoding).split("\n") + words = pathlib.Path(self.path).read_text(encoding=config["encoding"]).split("\n") if len(words) == 0: rasa.shared.utils.io.raise_warning( f"No words found in the {pathlib.Path(self.path)} file." From 01a35624fe220d45825aeb6939aedde39df81756 Mon Sep 17 00:00:00 2001 From: bsantamaria Date: Mon, 31 Jan 2022 16:46:03 +0100 Subject: [PATCH 3/3] Add black style --- rasa_nlu_examples/extractors/flashtext_entity_extractor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rasa_nlu_examples/extractors/flashtext_entity_extractor.py b/rasa_nlu_examples/extractors/flashtext_entity_extractor.py index 98634fc..ced2e53 100644 --- a/rasa_nlu_examples/extractors/flashtext_entity_extractor.py +++ b/rasa_nlu_examples/extractors/flashtext_entity_extractor.py @@ -64,7 +64,9 @@ def __init__( ) for non_word_boundary in config["non_word_boundaries"]: self.keyword_processor.add_non_word_boundary(non_word_boundary) - words = pathlib.Path(self.path).read_text(encoding=config["encoding"]).split("\n") + words = ( + pathlib.Path(self.path).read_text(encoding=config["encoding"]).split("\n") + ) if len(words) == 0: rasa.shared.utils.io.raise_warning( f"No words found in the {pathlib.Path(self.path)} file."