-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
8a867aa
commit ba17bd2
Showing
31 changed files
with
5,174 additions
and
4,173 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
import logging | ||
logging.getLogger("rdflib").setLevel(logging.WARNING) # avoid INFO console message on startup | ||
from holmes_extractor.manager import Manager as Manager | ||
from holmes_extractor.manager import MultiprocessingManager as MultiprocessingManager | ||
from holmes_extractor.ontology import Ontology as Ontology | ||
from holmes_extractor.manager import Manager | ||
from holmes_extractor.manager import MultiprocessingManager | ||
from holmes_extractor.ontology import Ontology |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,14 +14,16 @@ def _match_description(self, match_dict): | |
if match_dict['negated']: | ||
match_description_to_return = '; negated' | ||
if match_dict['uncertain']: | ||
match_description_to_return = ''.join((match_description_to_return, '; uncertain')) | ||
match_description_to_return = ''.join(( | ||
match_description_to_return, '; uncertain')) | ||
if match_dict['involves_coreference']: | ||
match_description_to_return = ''.join((match_description_to_return, | ||
'; involves coreference')) | ||
match_description_to_return = ''.join(( | ||
match_description_to_return, '; involves coreference')) | ||
overall_similarity_measure = float(match_dict['overall_similarity_measure']) | ||
if overall_similarity_measure < 1.0: | ||
match_description_to_return = ''.join((match_description_to_return, | ||
'; overall similarity measure=', str(overall_similarity_measure))) | ||
match_description_to_return = ''.join(( | ||
match_description_to_return, '; overall similarity measure=', | ||
str(overall_similarity_measure))) | ||
return match_description_to_return | ||
|
||
def _string_representation_of_word_match(self, word_match): | ||
|
@@ -30,8 +32,9 @@ def _string_representation_of_word_match(self, word_match): | |
extracted_word = ''.join(("(refers to '", word_match['extracted_word'], "')")) | ||
else: | ||
extracted_word = '' | ||
string = ''.join(("'", word_match['document_phrase'], "'", extracted_word, "->'", | ||
word_match['search_phrase_word'], "' (", word_match['match_type'])) | ||
string = ''.join(( | ||
"'", word_match['document_phrase'], "'", extracted_word, "->'", | ||
word_match['search_phrase_word'], "' (", word_match['match_type'])) | ||
if float(word_match['similarity_measure']) < 1.0: | ||
string = ''.join((string, ': ', word_match['similarity_measure'])) | ||
string = ''.join((string, ")")) | ||
|
@@ -42,7 +45,7 @@ def _common(self): | |
print("Holmes version 2.2 written by [email protected]") | ||
print("Language is", self._semantic_analyzer.language_name) | ||
print("Model is", self._semantic_analyzer.model) | ||
if self._structural_matcher.ontology == None: | ||
if self._structural_matcher.ontology is None: | ||
print("No ontology is being used") | ||
else: | ||
print("Ontology is", self._structural_matcher.ontology.path) | ||
|
@@ -58,7 +61,8 @@ def _common(self): | |
print("Derivational morphology analysis is ON") | ||
else: | ||
print("Derivational morphology analysis is OFF") | ||
print("Overall similarity threshold is", str( | ||
print( | ||
"Overall similarity threshold is", str( | ||
self._structural_matcher.overall_similarity_threshold)) | ||
if self._structural_matcher.overall_similarity_threshold < 1.0: | ||
if self._structural_matcher.embedding_based_matching_on_root_words: | ||
|
@@ -81,11 +85,11 @@ def start_chatbot_mode(self): | |
print(''.join(("Search phrase '", search_phrase.doc.text, "'"))) | ||
# only has an effect when debug==True | ||
self._semantic_analyzer.debug_structures(search_phrase.doc) | ||
if self._structural_matcher.ontology != None: | ||
if self._structural_matcher.ontology is not None: | ||
for token in search_phrase.matchable_tokens: | ||
lemma = token._.holmes.lemma | ||
matching_terms = self._structural_matcher.ontology.get_words_matching( | ||
lemma) | ||
lemma) | ||
if len(matching_terms) > 0: | ||
print(lemma, 'also matches', matching_terms) | ||
print() | ||
|
@@ -101,11 +105,12 @@ def start_chatbot_mode(self): | |
match_dicts = self._holmes.match_search_phrases_against(entry=search_sentence) | ||
for match_dict in match_dicts: | ||
print() | ||
print(''.join(("Matched search phrase '", | ||
match_dict['search_phrase'], "'", self._match_description(match_dict), | ||
":"))) | ||
word_matches_string = '; '.join( | ||
map(self._string_representation_of_word_match, match_dict['word_matches'])) | ||
print(''.join(( | ||
"Matched search phrase '", | ||
match_dict['search_phrase'], "'", self._match_description(match_dict), | ||
":"))) | ||
word_matches_string = '; '.join(map( | ||
self._string_representation_of_word_match, match_dict['word_matches'])) | ||
print(word_matches_string) | ||
|
||
def start_structural_search_mode(self): | ||
|
@@ -131,40 +136,52 @@ def start_structural_search_mode(self): | |
if search_phrase in ('exit', 'exit()', 'bye'): | ||
break | ||
print() | ||
match_dicts=[] | ||
match_dicts = [] | ||
try: | ||
match_dicts = self._holmes.match_documents_against(search_phrase_text=search_phrase) | ||
if len(match_dicts) == 0: | ||
print('No structural matching results were returned.') | ||
else: | ||
print('Structural matching results:') | ||
except SearchPhraseContainsNegationError: | ||
print('Structural matching was not attempted because the search phrase contained negation (not, never).') | ||
print( | ||
'Structural matching was not attempted because the search phrase contained '\ | ||
'negation (not, never).') | ||
print() | ||
except SearchPhraseContainsConjunctionError: | ||
print('Structural matching was not attempted because the search phrase contained conjunction (and, or).') | ||
print( | ||
'Structural matching was not attempted because the search phrase contained '\ | ||
'conjunction (and, or).') | ||
print() | ||
except SearchPhraseContainsCoreferringPronounError: | ||
print('Structural matching was not attempted because the search phrase contained a pronoun that referred back to a noun.') | ||
print( | ||
'Structural matching was not attempted because the search phrase contained a '\ | ||
'pronoun that referred back to a noun.') | ||
print() | ||
except SearchPhraseWithoutMatchableWordsError: | ||
print('Structural matching was not attempted because the search phrase did not contain any words that could be matched.') | ||
print( | ||
'Structural matching was not attempted because the search phrase did not '\ | ||
' contain any words that could be matched.') | ||
print() | ||
except SearchPhraseContainsMultipleClausesError: | ||
print('Structural matching was not attempted because the search phrase contained multiple clauses.') | ||
print( | ||
'Structural matching was not attempted because the search phrase contained '\ | ||
'multiple clauses.') | ||
print() | ||
print() | ||
for match_dict in match_dicts: | ||
print() | ||
print(''.join(("Matched document '", match_dict['document'], | ||
"' at index ", str(match_dict['index_within_document']), | ||
self._match_description(match_dict), ":"))) | ||
print(''.join(( | ||
"Matched document '", match_dict['document'], | ||
"' at index ", str(match_dict['index_within_document']), | ||
self._match_description(match_dict), ":"))) | ||
print(''.join(('"', match_dict['sentences_within_document'], '"'))) | ||
word_matches_string = '; '.join(map(self._string_representation_of_word_match, | ||
match_dict['word_matches'])) | ||
word_matches_string = '; '.join( | ||
map(self._string_representation_of_word_match, match_dict['word_matches'])) | ||
print(word_matches_string) | ||
|
||
def start_topic_matching_search_mode(self, only_one_result_per_document, | ||
def start_topic_matching_search_mode( | ||
self, only_one_result_per_document, | ||
maximum_number_of_single_word_matches_for_relation_matching, | ||
maximum_number_of_single_word_matches_for_embedding_matching): | ||
"""Starts a topic matching search mode console enabling the matching of pre-registered | ||
|
@@ -195,21 +212,20 @@ def start_topic_matching_search_mode(self, only_one_result_per_document, | |
break | ||
print() | ||
print('Performing topic matching ...') | ||
topic_matches = {} | ||
try: | ||
print() | ||
topic_match_dicts = \ | ||
self._holmes.topic_match_documents_returning_dictionaries_against( | ||
search_text, | ||
number_of_results = 5, | ||
only_one_result_per_document=only_one_result_per_document, | ||
maximum_number_of_single_word_matches_for_relation_matching = | ||
maximum_number_of_single_word_matches_for_relation_matching, | ||
maximum_number_of_single_word_matches_for_embedding_matching = | ||
maximum_number_of_single_word_matches_for_embedding_matching) | ||
search_text, | ||
number_of_results=5, | ||
only_one_result_per_document=only_one_result_per_document, | ||
maximum_number_of_single_word_matches_for_relation_matching= | ||
maximum_number_of_single_word_matches_for_relation_matching, | ||
maximum_number_of_single_word_matches_for_embedding_matching= | ||
maximum_number_of_single_word_matches_for_embedding_matching) | ||
except NoSearchPhraseError: | ||
pass | ||
if topic_match_dicts == None or len(topic_match_dicts) == 0: | ||
if topic_match_dicts is None or len(topic_match_dicts) == 0: | ||
print('No topic match results were returned.') | ||
print() | ||
continue | ||
|
@@ -218,7 +234,7 @@ def start_topic_matching_search_mode(self, only_one_result_per_document, | |
else: | ||
print('Topic matching results:') | ||
print() | ||
for index, topic_match_dict in enumerate(topic_match_dicts): | ||
for topic_match_dict in topic_match_dicts: | ||
output = ''.join(( | ||
topic_match_dict['rank'], | ||
'. Document ', | ||
|
@@ -231,8 +247,8 @@ def start_topic_matching_search_mode(self, only_one_result_per_document, | |
str(topic_match_dict['score']), | ||
':' | ||
)) | ||
print (output) | ||
print(output) | ||
print() | ||
print (topic_match_dict['text']) | ||
print(topic_match_dict['text']) | ||
print() | ||
print() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.